diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index cc3a06b7b..4cea9d970 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -8,7 +8,6 @@ import collections.abc import enum import inspect import typing -import uuid from dataclasses import dataclass from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -16,12 +15,7 @@ from llama_stack.apis.version import LLAMA_STACK_API_VERSION from termcolor import colored -from ..strong_typing.inspection import ( - get_signature, - is_type_enum, - is_type_optional, - unwrap_optional_type, -) +from ..strong_typing.inspection import get_signature def split_prefix( @@ -113,9 +107,6 @@ class EndpointOperation: def get_route(self) -> str: if self.route is not None: - assert ( - "_" not in self.route - ), f"route should not contain underscores: {self.route}" return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")]) route_parts = ["", LLAMA_STACK_API_VERSION, self.name] @@ -265,42 +256,16 @@ def get_endpoint_operations( f"parameter '{param_name}' in function '{func_name}' has no type annotation" ) - if is_type_optional(param_type): - inner_type: type = unwrap_optional_type(param_type) - else: - inner_type = param_type - - if prefix == "get" and ( - inner_type is bool - or inner_type is int - or inner_type is float - or inner_type is str - or inner_type is uuid.UUID - or is_type_enum(inner_type) - ): - if parameter.kind == inspect.Parameter.POSITIONAL_ONLY: - if route_params is not None and param_name not in route_params: - raise ValidationError( - f"positional parameter '{param_name}' absent from user-defined route '{route}' for function '{func_name}'" - ) - - # simple type maps to route path element, e.g. /study/{uuid}/{version} + if prefix in ["get", "delete"]: + if route_params is not None and param_name in route_params: path_params.append((param_name, param_type)) else: - if route_params is not None and param_name in route_params: - raise ValidationError( - f"query parameter '{param_name}' found in user-defined route '{route}' for function '{func_name}'" - ) - - # simple type maps to key=value pair in query string query_params.append((param_name, param_type)) else: if route_params is not None and param_name in route_params: - raise ValidationError( - f"user-defined route '{route}' for function '{func_name}' has parameter '{param_name}' of composite type: {param_type}" - ) - - request_params.append((param_name, param_type)) + path_params.append((param_name, param_type)) + else: + request_params.append((param_name, param_type)) # check if function has explicit return type if signature.return_annotation is inspect.Signature.empty: @@ -335,19 +300,18 @@ def get_endpoint_operations( response_type = process_type(return_type) - # set HTTP request method based on type of request and presence of payload - if not request_params: if prefix in ["delete", "remove"]: http_method = HTTPMethod.DELETE - else: + elif prefix == "post": + http_method = HTTPMethod.POST + elif prefix == "get": http_method = HTTPMethod.GET - else: - if prefix == "set": + elif prefix == "set": http_method = HTTPMethod.PUT elif prefix == "update": http_method = HTTPMethod.PATCH else: - http_method = HTTPMethod.POST + raise ValidationError(f"unknown prefix {prefix}") result.append( EndpointOperation( diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 3f74a79cf..2db33c87a 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -29,7 +29,76 @@ } ], "paths": { - "/v1/datasetio/append-rows": { + "/v1/datasetio/rows": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PaginatedRowsResult" + } + } + } + } + }, + "tags": [ + "DatasetIO" + ], + "parameters": [ + { + "name": "dataset_id", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "rows_in_page", + "in": "query", + "required": true, + "schema": { + "type": "integer" + } + }, + { + "name": "page_token", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "filter_condition", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, "post": { "responses": { "200": { @@ -323,7 +392,7 @@ } } }, - "/v1/agents/create": { + "/v1/agents": { "post": { "responses": { "200": { @@ -372,7 +441,7 @@ } } }, - "/v1/agents/session/create": { + "/v1/agents/{agent_id}/session": { "post": { "responses": { "200": { @@ -390,6 +459,14 @@ "Agents" ], "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, { "name": "X-LlamaStack-Provider-Data", "in": "header", @@ -421,7 +498,7 @@ } } }, - "/v1/agents/turn/create": { + "/v1/agents/{agent_id}/session/{session_id}/turn": { "post": { "responses": { "200": { @@ -446,6 +523,22 @@ "Agents" ], "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "session_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, { "name": "X-LlamaStack-Provider-Data", "in": "header", @@ -477,8 +570,8 @@ } } }, - "/v1/agents/delete": { - "post": { + "/v1/agents/{agent_id}": { + "delete": { "responses": { "200": { "description": "OK" @@ -488,6 +581,14 @@ "Agents" ], "parameters": [ + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, { "name": "X-LlamaStack-Provider-Data", "in": "header", @@ -506,30 +607,54 @@ "type": "string" } } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DeleteAgentsRequest" - } - } - }, - "required": true - } + ] } }, - "/v1/agents/session/delete": { - "post": { + "/v1/agents/{agent_id}/session/{session_id}": { + "get": { "responses": { "200": { - "description": "OK" + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Session" + } + } + } } }, "tags": [ "Agents" ], "parameters": [ + { + "name": "session_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "turn_ids", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + }, { "name": "X-LlamaStack-Provider-Data", "in": "header", @@ -548,17 +673,53 @@ "type": "string" } } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "Agents" ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DeleteAgentsSessionRequest" - } + "parameters": [ + { + "name": "session_id", + "in": "path", + "required": true, + "schema": { + "type": "string" } }, - "required": true - } + { + "name": "agent_id", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] } }, "/v1/inference/embeddings": { @@ -659,72 +820,7 @@ } } }, - "/v1/agents/session/get": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Session" - } - } - } - } - }, - "tags": [ - "Agents" - ], - "parameters": [ - { - "name": "agent_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "session_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetAgentsSessionRequest" - } - } - }, - "required": true - } - } - }, - "/v1/agents/step/get": { + "/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}": { "get": { "responses": { "200": { @@ -744,7 +840,7 @@ "parameters": [ { "name": "agent_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -752,7 +848,7 @@ }, { "name": "session_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -760,7 +856,7 @@ }, { "name": "turn_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -768,7 +864,7 @@ }, { "name": "step_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -795,7 +891,7 @@ ] } }, - "/v1/agents/turn/get": { + "/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}": { "get": { "responses": { "200": { @@ -815,7 +911,7 @@ "parameters": [ { "name": "agent_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -823,7 +919,7 @@ }, { "name": "session_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -831,7 +927,7 @@ }, { "name": "turn_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -858,7 +954,7 @@ ] } }, - "/v1/datasets/get": { + "/v1/datasets/{dataset_id}": { "get": { "responses": { "200": { @@ -885,7 +981,45 @@ "parameters": [ { "name": "dataset_id", - "in": "query", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "Datasets" + ], + "parameters": [ + { + "name": "dataset_id", + "in": "path", "required": true, "schema": { "type": "string" @@ -912,7 +1046,7 @@ ] } }, - "/v1/eval-tasks/get": { + "/v1/eval-tasks/{eval_task_id}": { "get": { "responses": { "200": { @@ -938,8 +1072,8 @@ ], "parameters": [ { - "name": "name", - "in": "query", + "name": "eval_task_id", + "in": "path", "required": true, "schema": { "type": "string" @@ -966,7 +1100,7 @@ ] } }, - "/v1/memory-banks/get": { + "/v1/memory-banks/{memory_bank_id}": { "get": { "responses": { "200": { @@ -993,7 +1127,45 @@ "parameters": [ { "name": "memory_bank_id", - "in": "query", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "MemoryBanks" + ], + "parameters": [ + { + "name": "memory_bank_id", + "in": "path", "required": true, "schema": { "type": "string" @@ -1020,7 +1192,7 @@ ] } }, - "/v1/models/get": { + "/v1/models/{model_id}": { "get": { "responses": { "200": { @@ -1046,8 +1218,8 @@ ], "parameters": [ { - "name": "identifier", - "in": "query", + "name": "model_id", + "in": "path", "required": true, "schema": { "type": "string" @@ -1072,58 +1244,25 @@ } } ] - } - }, - "/v1/datasetio/get-rows-paginated": { - "get": { + }, + "delete": { "responses": { "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/PaginatedRowsResult" - } - } - } + "description": "OK" } }, "tags": [ - "DatasetIO" + "Models" ], "parameters": [ { - "name": "dataset_id", - "in": "query", + "name": "model_id", + "in": "path", "required": true, "schema": { "type": "string" } }, - { - "name": "rows_in_page", - "in": "query", - "required": true, - "schema": { - "type": "integer" - } - }, - { - "name": "page_token", - "in": "query", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "filter_condition", - "in": "query", - "required": false, - "schema": { - "type": "string" - } - }, { "name": "X-LlamaStack-Provider-Data", "in": "header", @@ -1145,7 +1284,7 @@ ] } }, - "/v1/scoring-functions/get": { + "/v1/scoring-functions/{scoring_fn_id}": { "get": { "responses": { "200": { @@ -1172,7 +1311,7 @@ "parameters": [ { "name": "scoring_fn_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -1199,7 +1338,7 @@ ] } }, - "/v1/shields/get": { + "/v1/shields/{identifier}": { "get": { "responses": { "200": { @@ -1226,7 +1365,7 @@ "parameters": [ { "name": "identifier", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -1253,75 +1392,7 @@ ] } }, - "/v1/telemetry/get-span-tree": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/SpanWithStatus" - } - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [ - { - "name": "span_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "max_depth", - "in": "query", - "required": false, - "schema": { - "type": "integer" - } - }, - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/GetSpanTreeRequest" - } - } - }, - "required": true - } - } - }, - "/v1/tools/get": { + "/v1/tools/{tool_name}": { "get": { "responses": { "200": { @@ -1341,7 +1412,7 @@ "parameters": [ { "name": "tool_name", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" @@ -1368,7 +1439,7 @@ ] } }, - "/v1/toolgroups/get": { + "/v1/toolgroups/{toolgroup_id}": { "get": { "responses": { "200": { @@ -1388,7 +1459,46 @@ "parameters": [ { "name": "toolgroup_id", - "in": "query", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "ToolGroups" + ], + "summary": "Unregister a tool group", + "parameters": [ + { + "name": "toolgroup_id", + "in": "path", "required": true, "schema": { "type": "string" @@ -1529,9 +1639,9 @@ "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/PostTrainingJob" + "$ref": "#/components/schemas/ListPostTrainingJobsResponse" } } } @@ -1693,7 +1803,7 @@ } } }, - "/v1/eval/job/cancel": { + "/v1/eval/jobs/cancel": { "post": { "responses": { "200": { @@ -1735,7 +1845,7 @@ } } }, - "/v1/eval/job/result": { + "/v1/eval/jobs/{job_id}/result": { "get": { "responses": { "200": { @@ -1754,15 +1864,15 @@ ], "parameters": [ { - "name": "task_id", - "in": "query", + "name": "job_id", + "in": "path", "required": true, "schema": { "type": "string" } }, { - "name": "job_id", + "name": "task_id", "in": "query", "required": true, "schema": { @@ -1790,7 +1900,7 @@ ] } }, - "/v1/eval/job/status": { + "/v1/eval/jobs/{job_id}": { "get": { "responses": { "200": { @@ -1816,15 +1926,15 @@ ], "parameters": [ { - "name": "task_id", - "in": "query", + "name": "job_id", + "in": "path", "required": true, "schema": { "type": "string" } }, { - "name": "job_id", + "name": "task_id", "in": "query", "required": true, "schema": { @@ -1852,15 +1962,15 @@ ] } }, - "/v1/datasets/list": { + "/v1/datasets": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/Dataset" + "$ref": "#/components/schemas/ListDatasetsResponse" } } } @@ -1889,17 +1999,57 @@ } } ] + }, + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "Datasets" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterDatasetRequest" + } + } + }, + "required": true + } } }, - "/v1/eval-tasks/list": { + "/v1/eval-tasks": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/EvalTask" + "$ref": "#/components/schemas/ListEvalTasksResponse" } } } @@ -1928,17 +2078,57 @@ } } ] + }, + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "EvalTasks" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterEvalTaskRequest" + } + } + }, + "required": true + } } }, - "/v1/memory-banks/list": { + "/v1/memory-banks": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/MemoryBank" + "$ref": "#/components/schemas/ListMemoryBanksResponse" } } } @@ -1967,15 +2157,112 @@ } } ] + }, + "post": { + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBank" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBank" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBank" + }, + { + "$ref": "#/components/schemas/GraphMemoryBank" + } + ] + } + } + } + } + }, + "tags": [ + "MemoryBanks" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterMemoryBankRequest" + } + } + }, + "required": true + } } }, - "/v1/models/list": { + "/v1/models": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListModelsResponse" + } + } + } + } + }, + "tags": [ + "Models" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { "schema": { "$ref": "#/components/schemas/Model" } @@ -2005,7 +2292,17 @@ "type": "string" } } - ] + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterModelRequest" + } + } + }, + "required": true + } } }, "/v1/providers/list": { @@ -2016,10 +2313,7 @@ "content": { "application/json": { "schema": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ProviderInfo" - } + "$ref": "#/components/schemas/ListProvidersResponse" } } } @@ -2096,7 +2390,7 @@ } }, "/v1/tool-runtime/list-tools": { - "post": { + "get": { "responses": { "200": { "description": "OK", @@ -2121,6 +2415,14 @@ "type": "string" } }, + { + "name": "mcp_endpoint", + "in": "query", + "required": false, + "schema": { + "$ref": "#/components/schemas/URL" + } + }, { "name": "X-LlamaStack-Provider-Data", "in": "header", @@ -2139,28 +2441,18 @@ "type": "string" } } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ListRuntimeToolsRequest" - } - } - }, - "required": true - } + ] } }, - "/v1/scoring-functions/list": { + "/v1/scoring-functions": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/ScoringFn" + "$ref": "#/components/schemas/ListScoringFunctionsResponse" } } } @@ -2189,15 +2481,92 @@ } } ] + }, + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "ScoringFunctions" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterScoringFunctionRequest" + } + } + }, + "required": true + } } }, - "/v1/shields/list": { + "/v1/shields": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListShieldsResponse" + } + } + } + } + }, + "tags": [ + "Shields" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { "schema": { "$ref": "#/components/schemas/Shield" } @@ -2227,18 +2596,28 @@ "type": "string" } } - ] + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterShieldRequest" + } + } + }, + "required": true + } } }, - "/v1/toolgroups/list": { + "/v1/toolgroups": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/ToolGroup" + "$ref": "#/components/schemas/ListToolGroupsResponse" } } } @@ -2268,17 +2647,58 @@ } } ] + }, + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "ToolGroups" + ], + "summary": "Register a tool group", + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterToolGroupRequest" + } + } + }, + "required": true + } } }, - "/v1/tools/list": { + "/v1/tools": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/Tool" + "$ref": "#/components/schemas/ListToolsResponse" } } } @@ -2290,7 +2710,7 @@ "summary": "List tools with optional tool group", "parameters": [ { - "name": "tool_group_id", + "name": "toolgroup_id", "in": "query", "required": false, "schema": { @@ -2458,6 +2878,58 @@ } } }, + "/v1/telemetry/query-span-tree": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/SpanWithStatus" + } + } + } + } + } + }, + "tags": [ + "Telemetry" + ], + "parameters": [ + { + "name": "X-LlamaStack-Provider-Data", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "X-LlamaStack-Client-Version", + "in": "header", + "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QuerySpanTreeRequest" + } + } + }, + "required": true + } + } + }, "/v1/telemetry/query-spans": { "post": { "responses": { @@ -2556,336 +3028,7 @@ } } }, - "/v1/datasets/register": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Datasets" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterDatasetRequest" - } - } - }, - "required": true - } - } - }, - "/v1/eval-tasks/register": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "EvalTasks" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterEvalTaskRequest" - } - } - }, - "required": true - } - } - }, - "/v1/memory-banks/register": { - "post": { - "responses": { - "200": { - "description": "", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorMemoryBank" - }, - { - "$ref": "#/components/schemas/KeyValueMemoryBank" - }, - { - "$ref": "#/components/schemas/KeywordMemoryBank" - }, - { - "$ref": "#/components/schemas/GraphMemoryBank" - } - ] - } - } - } - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterMemoryBankRequest" - } - } - }, - "required": true - } - } - }, - "/v1/models/register": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Model" - } - } - } - } - }, - "tags": [ - "Models" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterModelRequest" - } - } - }, - "required": true - } - } - }, - "/v1/scoring-functions/register": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "ScoringFunctions" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterScoringFunctionRequest" - } - } - }, - "required": true - } - } - }, - "/v1/shields/register": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Shield" - } - } - } - } - }, - "tags": [ - "Shields" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterShieldRequest" - } - } - }, - "required": true - } - } - }, - "/v1/toolgroups/register": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "ToolGroups" - ], - "summary": "Register a tool group", - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RegisterToolGroupRequest" - } - } - }, - "required": true - } - } - }, - "/v1/eval/run-eval": { + "/v1/eval/run": { "post": { "responses": { "200": { @@ -3221,175 +3364,6 @@ } } }, - "/v1/datasets/unregister": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Datasets" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UnregisterDatasetRequest" - } - } - }, - "required": true - } - } - }, - "/v1/memory-banks/unregister": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UnregisterMemoryBankRequest" - } - } - }, - "required": true - } - } - }, - "/v1/models/unregister": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Models" - ], - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UnregisterModelRequest" - } - } - }, - "required": true - } - } - }, - "/v1/toolgroups/unregister": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "ToolGroups" - ], - "summary": "Unregister a tool group", - "parameters": [ - { - "name": "X-LlamaStack-Provider-Data", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-Client-Version", - "in": "header", - "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UnregisterToolGroupRequest" - } - } - }, - "required": true - } - } - }, "/v1/version": { "get": { "responses": { @@ -3512,6 +3486,20 @@ "tool_calls" ] }, + "GreedySamplingStrategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "greedy", + "default": "greedy" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, "ImageContentItem": { "type": "object", "properties": { @@ -3579,20 +3567,17 @@ "type": "object", "properties": { "strategy": { - "$ref": "#/components/schemas/SamplingStrategy", - "default": "greedy" - }, - "temperature": { - "type": "number", - "default": 0.0 - }, - "top_p": { - "type": "number", - "default": 0.95 - }, - "top_k": { - "type": "integer", - "default": 0 + "oneOf": [ + { + "$ref": "#/components/schemas/GreedySamplingStrategy" + }, + { + "$ref": "#/components/schemas/TopPSamplingStrategy" + }, + { + "$ref": "#/components/schemas/TopKSamplingStrategy" + } + ] }, "max_tokens": { "type": "integer", @@ -3608,14 +3593,6 @@ "strategy" ] }, - "SamplingStrategy": { - "type": "string", - "enum": [ - "greedy", - "top_p", - "top_k" - ] - }, "StopReason": { "type": "string", "enum": [ @@ -3869,6 +3846,45 @@ "content" ] }, + "TopKSamplingStrategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "top_k", + "default": "top_k" + }, + "top_k": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "top_k" + ] + }, + "TopPSamplingStrategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "top_p", + "default": "top_p" + }, + "temperature": { + "type": "number" + }, + "top_p": { + "type": "number", + "default": 0.95 + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, "URL": { "type": "object", "properties": { @@ -4631,16 +4647,12 @@ "CreateAgentSessionRequest": { "type": "object", "properties": { - "agent_id": { - "type": "string" - }, "session_name": { "type": "string" } }, "additionalProperties": false, "required": [ - "agent_id", "session_name" ] }, @@ -4659,12 +4671,6 @@ "CreateAgentTurnRequest": { "type": "object", "properties": { - "agent_id": { - "type": "string" - }, - "session_id": { - "type": "string" - }, "messages": { "type": "array", "items": { @@ -4725,8 +4731,6 @@ }, "additionalProperties": false, "required": [ - "agent_id", - "session_id", "messages" ] }, @@ -5266,34 +5270,6 @@ "error" ] }, - "DeleteAgentsRequest": { - "type": "object", - "properties": { - "agent_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "agent_id" - ] - }, - "DeleteAgentsSessionRequest": { - "type": "object", - "properties": { - "agent_id": { - "type": "string" - }, - "session_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "agent_id", - "session_id" - ] - }, "EmbeddingsRequest": { "type": "object", "properties": { @@ -5701,18 +5677,6 @@ "aggregated_results" ] }, - "GetAgentsSessionRequest": { - "type": "object", - "properties": { - "turn_ids": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false - }, "GraphMemoryBank": { "type": "object", "properties": { @@ -6431,85 +6395,6 @@ ], "title": "A safety shield resource that can be used to check content" }, - "GetSpanTreeRequest": { - "type": "object", - "properties": { - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false - }, - "SpanStatus": { - "type": "string", - "enum": [ - "ok", - "error" - ] - }, - "SpanWithStatus": { - "type": "object", - "properties": { - "span_id": { - "type": "string" - }, - "trace_id": { - "type": "string" - }, - "parent_span_id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "start_time": { - "type": "string", - "format": "date-time" - }, - "end_time": { - "type": "string", - "format": "date-time" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "status": { - "$ref": "#/components/schemas/SpanStatus" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ] - }, "Tool": { "type": "object", "properties": { @@ -6735,16 +6620,28 @@ ], "title": "Status of a finetuning job." }, - "PostTrainingJob": { + "ListPostTrainingJobsResponse": { "type": "object", "properties": { - "job_uuid": { - "type": "string" + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ] + } } }, "additionalProperties": false, "required": [ - "job_uuid" + "data" ] }, "HealthInfo": { @@ -6901,17 +6798,77 @@ "JobCancelRequest": { "type": "object", "properties": { - "task_id": { + "job_id": { "type": "string" }, - "job_id": { + "task_id": { "type": "string" } }, "additionalProperties": false, "required": [ - "task_id", - "job_id" + "job_id", + "task_id" + ] + }, + "ListDatasetsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Dataset" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "ListEvalTasksResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EvalTask" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "ListMemoryBanksResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MemoryBank" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "ListModelsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Model" + } + } + }, + "additionalProperties": false, + "required": [ + "data" ] }, "ProviderInfo": { @@ -6930,6 +6887,21 @@ "provider_type" ] }, + "ListProvidersResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ProviderInfo" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, "RouteInfo": { "type": "object", "properties": { @@ -6953,14 +6925,65 @@ "provider_types" ] }, - "ListRuntimeToolsRequest": { + "ListScoringFunctionsResponse": { "type": "object", "properties": { - "mcp_endpoint": { - "$ref": "#/components/schemas/URL" + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScoringFn" + } } }, - "additionalProperties": false + "additionalProperties": false, + "required": [ + "data" + ] + }, + "ListShieldsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Shield" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "ListToolGroupsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolGroup" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "ListToolsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] }, "LogSeverity": { "type": "string", @@ -7083,6 +7106,13 @@ "name" ] }, + "SpanStatus": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, "StructuredLogEvent": { "type": "object", "properties": { @@ -7467,6 +7497,18 @@ "logger_config" ] }, + "PostTrainingJob": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ] + }, "QueryDocumentsRequest": { "type": "object", "properties": { @@ -7547,6 +7589,87 @@ "scores" ] }, + "QuerySpanTreeRequest": { + "type": "object", + "properties": { + "span_id": { + "type": "string" + }, + "attributes_to_return": { + "type": "array", + "items": { + "type": "string" + } + }, + "max_depth": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "span_id" + ] + }, + "SpanWithStatus": { + "type": "object", + "properties": { + "span_id": { + "type": "string" + }, + "trace_id": { + "type": "string" + }, + "parent_span_id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "start_time": { + "type": "string", + "format": "date-time" + }, + "end_time": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "status": { + "$ref": "#/components/schemas/SpanStatus" + } + }, + "additionalProperties": false, + "required": [ + "span_id", + "trace_id", + "name", + "start_time" + ] + }, "QueryCondition": { "type": "object", "properties": { @@ -8606,54 +8729,6 @@ ], "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." }, - "UnregisterDatasetRequest": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "dataset_id" - ] - }, - "UnregisterMemoryBankRequest": { - "type": "object", - "properties": { - "memory_bank_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "memory_bank_id" - ] - }, - "UnregisterModelRequest": { - "type": "object", - "properties": { - "model_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "model_id" - ] - }, - "UnregisterToolGroupRequest": { - "type": "object", - "properties": { - "tool_group_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "tool_group_id" - ] - }, "VersionInfo": { "type": "object", "properties": { @@ -8855,14 +8930,6 @@ { "name": "Datasets" }, - { - "name": "DeleteAgentsRequest", - "description": "" - }, - { - "name": "DeleteAgentsSessionRequest", - "description": "" - }, { "name": "EfficiencyConfig", "description": "" @@ -8893,14 +8960,6 @@ "name": "EvaluateRowsRequest", "description": "" }, - { - "name": "GetAgentsSessionRequest", - "description": "" - }, - { - "name": "GetSpanTreeRequest", - "description": "" - }, { "name": "GraphMemoryBank", "description": "" @@ -8909,6 +8968,10 @@ "name": "GraphMemoryBankParams", "description": "" }, + { + "name": "GreedySamplingStrategy", + "description": "" + }, { "name": "HealthInfo", "description": "" @@ -8980,8 +9043,44 @@ "description": "" }, { - "name": "ListRuntimeToolsRequest", - "description": "" + "name": "ListDatasetsResponse", + "description": "" + }, + { + "name": "ListEvalTasksResponse", + "description": "" + }, + { + "name": "ListMemoryBanksResponse", + "description": "" + }, + { + "name": "ListModelsResponse", + "description": "" + }, + { + "name": "ListPostTrainingJobsResponse", + "description": "" + }, + { + "name": "ListProvidersResponse", + "description": "" + }, + { + "name": "ListScoringFunctionsResponse", + "description": "" + }, + { + "name": "ListShieldsResponse", + "description": "" + }, + { + "name": "ListToolGroupsResponse", + "description": "" + }, + { + "name": "ListToolsResponse", + "description": "" }, { "name": "LogEventRequest", @@ -9095,6 +9194,10 @@ "name": "QueryDocumentsResponse", "description": "" }, + { + "name": "QuerySpanTreeRequest", + "description": "" + }, { "name": "QuerySpansRequest", "description": "" @@ -9166,10 +9269,6 @@ "name": "SamplingParams", "description": "" }, - { - "name": "SamplingStrategy", - "description": "" - }, { "name": "SaveSpansToDatasetRequest", "description": "" @@ -9351,6 +9450,14 @@ { "name": "ToolRuntime" }, + { + "name": "TopKSamplingStrategy", + "description": "" + }, + { + "name": "TopPSamplingStrategy", + "description": "" + }, { "name": "Trace", "description": "" @@ -9367,22 +9474,6 @@ "name": "URL", "description": "" }, - { - "name": "UnregisterDatasetRequest", - "description": "" - }, - { - "name": "UnregisterMemoryBankRequest", - "description": "" - }, - { - "name": "UnregisterModelRequest", - "description": "" - }, - { - "name": "UnregisterToolGroupRequest", - "description": "" - }, { "name": "UnstructuredLogEvent", "description": "" @@ -9479,18 +9570,15 @@ "DataConfig", "Dataset", "DatasetFormat", - "DeleteAgentsRequest", - "DeleteAgentsSessionRequest", "EfficiencyConfig", "EmbeddingsRequest", "EmbeddingsResponse", "EvalTask", "EvaluateResponse", "EvaluateRowsRequest", - "GetAgentsSessionRequest", - "GetSpanTreeRequest", "GraphMemoryBank", "GraphMemoryBankParams", + "GreedySamplingStrategy", "HealthInfo", "ImageContentItem", "ImageDelta", @@ -9507,7 +9595,16 @@ "KeywordMemoryBank", "KeywordMemoryBankParams", "LLMAsJudgeScoringFnParams", - "ListRuntimeToolsRequest", + "ListDatasetsResponse", + "ListEvalTasksResponse", + "ListMemoryBanksResponse", + "ListModelsResponse", + "ListPostTrainingJobsResponse", + "ListProvidersResponse", + "ListScoringFunctionsResponse", + "ListShieldsResponse", + "ListToolGroupsResponse", + "ListToolsResponse", "LogEventRequest", "LogSeverity", "LoraFinetuningConfig", @@ -9533,6 +9630,7 @@ "QueryConditionOp", "QueryDocumentsRequest", "QueryDocumentsResponse", + "QuerySpanTreeRequest", "QuerySpansRequest", "QueryTracesRequest", "RegexParserScoringFnParams", @@ -9550,7 +9648,6 @@ "RunShieldResponse", "SafetyViolation", "SamplingParams", - "SamplingStrategy", "SaveSpansToDatasetRequest", "ScoreBatchRequest", "ScoreBatchResponse", @@ -9591,14 +9688,12 @@ "ToolPromptFormat", "ToolResponse", "ToolResponseMessage", + "TopKSamplingStrategy", + "TopPSamplingStrategy", "Trace", "TrainingConfig", "Turn", "URL", - "UnregisterDatasetRequest", - "UnregisterMemoryBankRequest", - "UnregisterModelRequest", - "UnregisterToolGroupRequest", "UnstructuredLogEvent", "UserMessage", "VectorMemoryBank", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 2afb8e375..ab27e4f3d 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -584,19 +584,14 @@ components: CreateAgentSessionRequest: additionalProperties: false properties: - agent_id: - type: string session_name: type: string required: - - agent_id - session_name type: object CreateAgentTurnRequest: additionalProperties: false properties: - agent_id: - type: string documents: items: additionalProperties: false @@ -622,8 +617,6 @@ components: - $ref: '#/components/schemas/UserMessage' - $ref: '#/components/schemas/ToolResponseMessage' type: array - session_id: - type: string stream: type: boolean toolgroups: @@ -631,8 +624,6 @@ components: $ref: '#/components/schemas/AgentTool' type: array required: - - agent_id - - session_id - messages type: object DPOAlignmentConfig: @@ -720,25 +711,6 @@ components: - instruct - dialog type: string - DeleteAgentsRequest: - additionalProperties: false - properties: - agent_id: - type: string - required: - - agent_id - type: object - DeleteAgentsSessionRequest: - additionalProperties: false - properties: - agent_id: - type: string - session_id: - type: string - required: - - agent_id - - session_id - type: object EfficiencyConfig: additionalProperties: false properties: @@ -872,22 +844,6 @@ components: - scoring_functions - task_config type: object - GetAgentsSessionRequest: - additionalProperties: false - properties: - turn_ids: - items: - type: string - type: array - type: object - GetSpanTreeRequest: - additionalProperties: false - properties: - attributes_to_return: - items: - type: string - type: array - type: object GraphMemoryBank: additionalProperties: false properties: @@ -922,6 +878,16 @@ components: required: - memory_bank_type type: object + GreedySamplingStrategy: + additionalProperties: false + properties: + type: + const: greedy + default: greedy + type: string + required: + - type + type: object HealthInfo: additionalProperties: false properties: @@ -1045,8 +1011,8 @@ components: task_id: type: string required: - - task_id - job_id + - task_id type: object JobStatus: enum: @@ -1146,11 +1112,111 @@ components: - type - judge_model type: object - ListRuntimeToolsRequest: + ListDatasetsResponse: additionalProperties: false properties: - mcp_endpoint: - $ref: '#/components/schemas/URL' + data: + items: + $ref: '#/components/schemas/Dataset' + type: array + required: + - data + type: object + ListEvalTasksResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/EvalTask' + type: array + required: + - data + type: object + ListMemoryBanksResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/MemoryBank' + type: array + required: + - data + type: object + ListModelsResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/Model' + type: array + required: + - data + type: object + ListPostTrainingJobsResponse: + additionalProperties: false + properties: + data: + items: + additionalProperties: false + properties: + job_uuid: + type: string + required: + - job_uuid + type: object + type: array + required: + - data + type: object + ListProvidersResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/ProviderInfo' + type: array + required: + - data + type: object + ListScoringFunctionsResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/ScoringFn' + type: array + required: + - data + type: object + ListShieldsResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/Shield' + type: array + required: + - data + type: object + ListToolGroupsResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/ToolGroup' + type: array + required: + - data + type: object + ListToolsResponse: + additionalProperties: false + properties: + data: + items: + $ref: '#/components/schemas/Tool' + type: array + required: + - data type: object LogEventRequest: additionalProperties: false @@ -1715,6 +1781,20 @@ components: - chunks - scores type: object + QuerySpanTreeRequest: + additionalProperties: false + properties: + attributes_to_return: + items: + type: string + type: array + max_depth: + type: integer + span_id: + type: string + required: + - span_id + type: object QuerySpansRequest: additionalProperties: false properties: @@ -2069,26 +2149,13 @@ components: default: 1.0 type: number strategy: - $ref: '#/components/schemas/SamplingStrategy' - default: greedy - temperature: - default: 0.0 - type: number - top_k: - default: 0 - type: integer - top_p: - default: 0.95 - type: number + oneOf: + - $ref: '#/components/schemas/GreedySamplingStrategy' + - $ref: '#/components/schemas/TopPSamplingStrategy' + - $ref: '#/components/schemas/TopKSamplingStrategy' required: - strategy type: object - SamplingStrategy: - enum: - - greedy - - top_p - - top_k - type: string SaveSpansToDatasetRequest: additionalProperties: false properties: @@ -2945,6 +3012,34 @@ components: - tool_name - content type: object + TopKSamplingStrategy: + additionalProperties: false + properties: + top_k: + type: integer + type: + const: top_k + default: top_k + type: string + required: + - type + - top_k + type: object + TopPSamplingStrategy: + additionalProperties: false + properties: + temperature: + type: number + top_p: + default: 0.95 + type: number + type: + const: top_p + default: top_p + type: string + required: + - type + type: object Trace: additionalProperties: false properties: @@ -3057,38 +3152,6 @@ components: required: - uri type: object - UnregisterDatasetRequest: - additionalProperties: false - properties: - dataset_id: - type: string - required: - - dataset_id - type: object - UnregisterMemoryBankRequest: - additionalProperties: false - properties: - memory_bank_id: - type: string - required: - - memory_bank_id - type: object - UnregisterModelRequest: - additionalProperties: false - properties: - model_id: - type: string - required: - - model_id - type: object - UnregisterToolGroupRequest: - additionalProperties: false - properties: - tool_group_id: - type: string - required: - - tool_group_id - type: object UnstructuredLogEvent: additionalProperties: false properties: @@ -3216,7 +3279,7 @@ info: jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema openapi: 3.1.0 paths: - /v1/agents/create: + /v1/agents: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3248,9 +3311,14 @@ paths: description: OK tags: - Agents - /v1/agents/delete: - post: + /v1/agents/{agent_id}: + delete: parameters: + - in: path + name: agent_id + required: true + schema: + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3265,20 +3333,19 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/DeleteAgentsRequest' - required: true responses: '200': description: OK tags: - Agents - /v1/agents/session/create: + /v1/agents/{agent_id}/session: post: parameters: + - in: path + name: agent_id + required: true + schema: + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3308,9 +3375,19 @@ paths: description: OK tags: - Agents - /v1/agents/session/delete: - post: + /v1/agents/{agent_id}/session/{session_id}: + delete: parameters: + - in: path + name: session_id + required: true + schema: + type: string + - in: path + name: agent_id + required: true + schema: + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3325,30 +3402,30 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/DeleteAgentsSessionRequest' - required: true responses: '200': description: OK tags: - Agents - /v1/agents/session/get: - post: + get: parameters: - - in: query + - in: path + name: session_id + required: true + schema: + type: string + - in: path name: agent_id required: true schema: type: string - in: query - name: session_id - required: true + name: turn_ids + required: false schema: - type: string + items: + type: string + type: array - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3363,12 +3440,6 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/GetAgentsSessionRequest' - required: true responses: '200': content: @@ -3378,55 +3449,19 @@ paths: description: OK tags: - Agents - /v1/agents/step/get: - get: + /v1/agents/{agent_id}/session/{session_id}/turn: + post: parameters: - - in: query + - in: path name: agent_id required: true schema: type: string - - in: query + - in: path name: session_id required: true schema: type: string - - in: query - name: turn_id - required: true - schema: - type: string - - in: query - name: step_id - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/AgentStepResponse' - description: OK - tags: - - Agents - /v1/agents/turn/create: - post: - parameters: - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3459,20 +3494,20 @@ paths: streamed agent turn completion response. tags: - Agents - /v1/agents/turn/get: + /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}: get: parameters: - - in: query + - in: path name: agent_id required: true schema: type: string - - in: query + - in: path name: session_id required: true schema: type: string - - in: query + - in: path name: turn_id required: true schema: @@ -3500,6 +3535,52 @@ paths: description: OK tags: - Agents + /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}: + get: + parameters: + - in: path + name: agent_id + required: true + schema: + type: string + - in: path + name: session_id + required: true + schema: + type: string + - in: path + name: turn_id + required: true + schema: + type: string + - in: path + name: step_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/AgentStepResponse' + description: OK + tags: + - Agents /v1/batch-inference/chat-completion: post: parameters: @@ -3564,35 +3645,7 @@ paths: description: OK tags: - BatchInference (Coming Soon) - /v1/datasetio/append-rows: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/AppendRowsRequest' - required: true - responses: - '200': - description: OK - tags: - - DatasetIO - /v1/datasetio/get-rows-paginated: + /v1/datasetio/rows: get: parameters: - in: query @@ -3638,10 +3691,116 @@ paths: description: OK tags: - DatasetIO - /v1/datasets/get: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/AppendRowsRequest' + required: true + responses: + '200': + description: OK + tags: + - DatasetIO + /v1/datasets: get: parameters: - - in: query + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/ListDatasetsResponse' + description: OK + tags: + - Datasets + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterDatasetRequest' + required: true + responses: + '200': + description: OK + tags: + - Datasets + /v1/datasets/{dataset_id}: + delete: + parameters: + - in: path + name: dataset_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + description: OK + tags: + - Datasets + get: + parameters: + - in: path name: dataset_id required: true schema: @@ -3671,7 +3830,7 @@ paths: description: OK tags: - Datasets - /v1/datasets/list: + /v1/eval-tasks: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3691,13 +3850,12 @@ paths: responses: '200': content: - application/jsonl: + application/json: schema: - $ref: '#/components/schemas/Dataset' + $ref: '#/components/schemas/ListEvalTasksResponse' description: OK tags: - - Datasets - /v1/datasets/register: + - EvalTasks post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3718,46 +3876,18 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RegisterDatasetRequest' + $ref: '#/components/schemas/RegisterEvalTaskRequest' required: true responses: '200': description: OK tags: - - Datasets - /v1/datasets/unregister: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UnregisterDatasetRequest' - required: true - responses: - '200': - description: OK - tags: - - Datasets - /v1/eval-tasks/get: + - EvalTasks + /v1/eval-tasks/{eval_task_id}: get: parameters: - - in: query - name: name + - in: path + name: eval_task_id required: true schema: type: string @@ -3786,60 +3916,6 @@ paths: description: OK tags: - EvalTasks - /v1/eval-tasks/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/EvalTask' - description: OK - tags: - - EvalTasks - /v1/eval-tasks/register: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterEvalTaskRequest' - required: true - responses: - '200': - description: OK - tags: - - EvalTasks /v1/eval/evaluate-rows: post: parameters: @@ -3872,7 +3948,7 @@ paths: description: OK tags: - Eval - /v1/eval/job/cancel: + /v1/eval/jobs/cancel: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3900,55 +3976,19 @@ paths: description: OK tags: - Eval - /v1/eval/job/result: + /v1/eval/jobs/{job_id}: get: parameters: - - in: query - name: task_id - required: true - schema: - type: string - - in: query + - in: path name: job_id required: true schema: type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - description: OK - tags: - - Eval - /v1/eval/job/status: - get: - parameters: - in: query name: task_id required: true schema: type: string - - in: query - name: job_id - required: true - schema: - type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3974,7 +4014,43 @@ paths: description: OK tags: - Eval - /v1/eval/run-eval: + /v1/eval/jobs/{job_id}/result: + get: + parameters: + - in: path + name: job_id + required: true + schema: + type: string + - in: query + name: task_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + description: OK + tags: + - Eval + /v1/eval/run: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4132,14 +4208,9 @@ paths: description: OK tags: - Inference - /v1/memory-banks/get: + /v1/memory-banks: get: parameters: - - in: query - name: memory_bank_id - required: true - schema: - type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -4159,39 +4230,10 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/MemoryBank' - - type: 'null' + $ref: '#/components/schemas/ListMemoryBanksResponse' description: OK tags: - MemoryBanks - /v1/memory-banks/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/MemoryBank' - description: OK - tags: - - MemoryBanks - /v1/memory-banks/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4227,9 +4269,14 @@ paths: description: '' tags: - MemoryBanks - /v1/memory-banks/unregister: - post: + /v1/memory-banks/{memory_bank_id}: + delete: parameters: + - in: path + name: memory_bank_id + required: true + schema: + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -4244,17 +4291,43 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UnregisterMemoryBankRequest' - required: true responses: '200': description: OK tags: - MemoryBanks + get: + parameters: + - in: path + name: memory_bank_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/MemoryBank' + - type: 'null' + description: OK + tags: + - MemoryBanks /v1/memory/insert: post: parameters: @@ -4315,14 +4388,9 @@ paths: description: OK tags: - Memory - /v1/models/get: + /v1/models: get: parameters: - - in: query - name: identifier - required: true - schema: - type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -4342,39 +4410,10 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Model' - - type: 'null' + $ref: '#/components/schemas/ListModelsResponse' description: OK tags: - Models - /v1/models/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Model' - description: OK - tags: - - Models - /v1/models/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4406,9 +4445,14 @@ paths: description: OK tags: - Models - /v1/models/unregister: - post: + /v1/models/{model_id}: + delete: parameters: + - in: path + name: model_id + required: true + schema: + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -4423,17 +4467,43 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UnregisterModelRequest' - required: true responses: '200': description: OK tags: - Models + get: + parameters: + - in: path + name: model_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/Model' + - type: 'null' + description: OK + tags: + - Models /v1/post-training/job/artifacts: get: parameters: @@ -4548,9 +4618,9 @@ paths: responses: '200': content: - application/jsonl: + application/json: schema: - $ref: '#/components/schemas/PostTrainingJob' + $ref: '#/components/schemas/ListPostTrainingJobsResponse' description: OK tags: - PostTraining (Coming Soon) @@ -4640,9 +4710,7 @@ paths: content: application/json: schema: - additionalProperties: - $ref: '#/components/schemas/ProviderInfo' - type: object + $ref: '#/components/schemas/ListProvidersResponse' description: OK tags: - Inspect @@ -4708,10 +4776,63 @@ paths: description: OK tags: - Safety - /v1/scoring-functions/get: + /v1/scoring-functions: get: parameters: - - in: query + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/ListScoringFunctionsResponse' + description: OK + tags: + - ScoringFunctions + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterScoringFunctionRequest' + required: true + responses: + '200': + description: OK + tags: + - ScoringFunctions + /v1/scoring-functions/{scoring_fn_id}: + get: + parameters: + - in: path name: scoring_fn_id required: true schema: @@ -4741,60 +4862,6 @@ paths: description: OK tags: - ScoringFunctions - /v1/scoring-functions/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/ScoringFn' - description: OK - tags: - - ScoringFunctions - /v1/scoring-functions/register: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RegisterScoringFunctionRequest' - required: true - responses: - '200': - description: OK - tags: - - ScoringFunctions /v1/scoring/score: post: parameters: @@ -4859,14 +4926,9 @@ paths: description: OK tags: - Scoring - /v1/shields/get: + /v1/shields: get: parameters: - - in: query - name: identifier - required: true - schema: - type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -4886,39 +4948,10 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Shield' - - type: 'null' + $ref: '#/components/schemas/ListShieldsResponse' description: OK tags: - Shields - /v1/shields/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Shield' - description: OK - tags: - - Shields - /v1/shields/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -4950,6 +4983,39 @@ paths: description: OK tags: - Shields + /v1/shields/{identifier}: + get: + parameters: + - in: path + name: identifier + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/Shield' + - type: 'null' + description: OK + tags: + - Shields /v1/synthetic-data-generation/generate: post: parameters: @@ -4982,50 +5048,6 @@ paths: description: OK tags: - SyntheticDataGeneration (Coming Soon) - /v1/telemetry/get-span-tree: - post: - parameters: - - in: query - name: span_id - required: true - schema: - type: string - - in: query - name: max_depth - required: false - schema: - type: integer - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/GetSpanTreeRequest' - required: true - responses: - '200': - content: - application/json: - schema: - additionalProperties: - $ref: '#/components/schemas/SpanWithStatus' - type: object - description: OK - tags: - - Telemetry /v1/telemetry/log-event: post: parameters: @@ -5054,6 +5076,40 @@ paths: description: OK tags: - Telemetry + /v1/telemetry/query-span-tree: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/QuerySpanTreeRequest' + required: true + responses: + '200': + content: + application/json: + schema: + additionalProperties: + $ref: '#/components/schemas/SpanWithStatus' + type: object + description: OK + tags: + - Telemetry /v1/telemetry/query-spans: post: parameters: @@ -5180,13 +5236,18 @@ paths: tags: - ToolRuntime /v1/tool-runtime/list-tools: - post: + get: parameters: - in: query name: tool_group_id required: false schema: type: string + - in: query + name: mcp_endpoint + required: false + schema: + $ref: '#/components/schemas/URL' - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -5201,12 +5262,6 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ListRuntimeToolsRequest' - required: true responses: '200': content: @@ -5216,14 +5271,9 @@ paths: description: OK tags: - ToolRuntime - /v1/toolgroups/get: + /v1/toolgroups: get: parameters: - - in: query - name: toolgroup_id - required: true - schema: - type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -5243,38 +5293,11 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ToolGroup' - description: OK - tags: - - ToolGroups - /v1/toolgroups/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/ToolGroup' + $ref: '#/components/schemas/ListToolGroupsResponse' description: OK summary: List tool groups with optional provider tags: - ToolGroups - /v1/toolgroups/register: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -5303,9 +5326,14 @@ paths: summary: Register a tool group tags: - ToolGroups - /v1/toolgroups/unregister: - post: + /v1/toolgroups/{toolgroup_id}: + delete: parameters: + - in: path + name: toolgroup_id + required: true + schema: + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -5320,22 +5348,78 @@ paths: required: false schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UnregisterToolGroupRequest' - required: true responses: '200': description: OK summary: Unregister a tool group tags: - ToolGroups - /v1/tools/get: + get: + parameters: + - in: path + name: toolgroup_id + required: true + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/ToolGroup' + description: OK + tags: + - ToolGroups + /v1/tools: get: parameters: - in: query + name: toolgroup_id + required: false + schema: + type: string + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-Provider-Data + required: false + schema: + type: string + - description: Version of the client making the request. This is used to ensure + that the client and server are compatible. + in: header + name: X-LlamaStack-Client-Version + required: false + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/ListToolsResponse' + description: OK + summary: List tools with optional tool group + tags: + - ToolGroups + /v1/tools/{tool_name}: + get: + parameters: + - in: path name: tool_name required: true schema: @@ -5363,38 +5447,6 @@ paths: description: OK tags: - ToolGroups - /v1/tools/list: - get: - parameters: - - in: query - name: tool_group_id - required: false - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-Provider-Data - required: false - schema: - type: string - - description: Version of the client making the request. This is used to ensure - that the client and server are compatible. - in: header - name: X-LlamaStack-Client-Version - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Tool' - description: OK - summary: List tools with optional tool group - tags: - - ToolGroups /v1/version: get: parameters: @@ -5568,12 +5620,6 @@ tags: name: DatasetFormat - name: DatasetIO - name: Datasets -- description: - name: DeleteAgentsRequest -- description: - name: DeleteAgentsSessionRequest - description: name: EfficiencyConfig @@ -5593,18 +5639,15 @@ tags: - description: name: EvaluateRowsRequest -- description: - name: GetAgentsSessionRequest -- description: - name: GetSpanTreeRequest - description: name: GraphMemoryBank - description: name: GraphMemoryBankParams +- description: + name: GreedySamplingStrategy - description: name: HealthInfo - description: name: LLMAsJudgeScoringFnParams -- description: - name: ListRuntimeToolsRequest + name: ListDatasetsResponse +- description: + name: ListEvalTasksResponse +- description: + name: ListMemoryBanksResponse +- description: + name: ListModelsResponse +- description: + name: ListPostTrainingJobsResponse +- description: + name: ListProvidersResponse +- description: + name: ListScoringFunctionsResponse +- description: + name: ListShieldsResponse +- description: + name: ListToolGroupsResponse +- description: + name: ListToolsResponse - description: name: LogEventRequest @@ -5727,6 +5797,9 @@ tags: - description: name: QueryDocumentsResponse +- description: + name: QuerySpanTreeRequest - description: name: QuerySpansRequest @@ -5775,9 +5848,6 @@ tags: name: SafetyViolation - description: name: SamplingParams -- description: - name: SamplingStrategy - description: name: SaveSpansToDatasetRequest @@ -5898,6 +5968,12 @@ tags: /> name: ToolResponseMessage - name: ToolRuntime +- description: + name: TopKSamplingStrategy +- description: + name: TopPSamplingStrategy - description: name: Trace - description: @@ -5909,18 +5985,6 @@ tags: name: Turn - description: name: URL -- description: - name: UnregisterDatasetRequest -- description: - name: UnregisterMemoryBankRequest -- description: - name: UnregisterModelRequest -- description: - name: UnregisterToolGroupRequest - description: name: UnstructuredLogEvent @@ -6003,18 +6067,15 @@ x-tagGroups: - DataConfig - Dataset - DatasetFormat - - DeleteAgentsRequest - - DeleteAgentsSessionRequest - EfficiencyConfig - EmbeddingsRequest - EmbeddingsResponse - EvalTask - EvaluateResponse - EvaluateRowsRequest - - GetAgentsSessionRequest - - GetSpanTreeRequest - GraphMemoryBank - GraphMemoryBankParams + - GreedySamplingStrategy - HealthInfo - ImageContentItem - ImageDelta @@ -6031,7 +6092,16 @@ x-tagGroups: - KeywordMemoryBank - KeywordMemoryBankParams - LLMAsJudgeScoringFnParams - - ListRuntimeToolsRequest + - ListDatasetsResponse + - ListEvalTasksResponse + - ListMemoryBanksResponse + - ListModelsResponse + - ListPostTrainingJobsResponse + - ListProvidersResponse + - ListScoringFunctionsResponse + - ListShieldsResponse + - ListToolGroupsResponse + - ListToolsResponse - LogEventRequest - LogSeverity - LoraFinetuningConfig @@ -6057,6 +6127,7 @@ x-tagGroups: - QueryConditionOp - QueryDocumentsRequest - QueryDocumentsResponse + - QuerySpanTreeRequest - QuerySpansRequest - QueryTracesRequest - RegexParserScoringFnParams @@ -6074,7 +6145,6 @@ x-tagGroups: - RunShieldResponse - SafetyViolation - SamplingParams - - SamplingStrategy - SaveSpansToDatasetRequest - ScoreBatchRequest - ScoreBatchResponse @@ -6115,14 +6185,12 @@ x-tagGroups: - ToolPromptFormat - ToolResponse - ToolResponseMessage + - TopKSamplingStrategy + - TopPSamplingStrategy - Trace - TrainingConfig - Turn - URL - - UnregisterDatasetRequest - - UnregisterMemoryBankRequest - - UnregisterModelRequest - - UnregisterToolGroupRequest - UnstructuredLogEvent - UserMessage - VectorMemoryBank diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index c3f3d21f0..63d0920fb 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -7,6 +7,7 @@ from datetime import datetime from enum import Enum from typing import ( + Annotated, Any, AsyncIterator, Dict, @@ -20,7 +21,6 @@ from typing import ( from llama_models.schema_utils import json_schema_type, register_schema, webmethod from pydantic import BaseModel, ConfigDict, Field -from typing_extensions import Annotated from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, URL from llama_stack.apis.inference import ( @@ -296,13 +296,13 @@ class AgentStepResponse(BaseModel): @runtime_checkable @trace_protocol class Agents(Protocol): - @webmethod(route="/agents/create") + @webmethod(route="/agents", method="POST") async def create_agent( self, agent_config: AgentConfig, ) -> AgentCreateResponse: ... - @webmethod(route="/agents/turn/create") + @webmethod(route="/agents/{agent_id}/session/{session_id}/turn", method="POST") async def create_agent_turn( self, agent_id: str, @@ -318,36 +318,52 @@ class Agents(Protocol): toolgroups: Optional[List[AgentToolGroup]] = None, ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ... - @webmethod(route="/agents/turn/get") + @webmethod( + route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", method="GET" + ) async def get_agents_turn( - self, agent_id: str, session_id: str, turn_id: str + self, + agent_id: str, + session_id: str, + turn_id: str, ) -> Turn: ... - @webmethod(route="/agents/step/get") + @webmethod( + route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", + method="GET", + ) async def get_agents_step( - self, agent_id: str, session_id: str, turn_id: str, step_id: str + self, + agent_id: str, + session_id: str, + turn_id: str, + step_id: str, ) -> AgentStepResponse: ... - @webmethod(route="/agents/session/create") + @webmethod(route="/agents/{agent_id}/session", method="POST") async def create_agent_session( self, agent_id: str, session_name: str, ) -> AgentSessionCreateResponse: ... - @webmethod(route="/agents/session/get") + @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET") async def get_agents_session( self, - agent_id: str, session_id: str, + agent_id: str, turn_ids: Optional[List[str]] = None, ) -> Session: ... - @webmethod(route="/agents/session/delete") - async def delete_agents_session(self, agent_id: str, session_id: str) -> None: ... + @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE") + async def delete_agents_session( + self, + session_id: str, + agent_id: str, + ) -> None: ... - @webmethod(route="/agents/delete") - async def delete_agents( + @webmethod(route="/agents/{agent_id}", method="DELETE") + async def delete_agent( self, agent_id: str, ) -> None: ... diff --git a/llama_stack/apis/batch_inference/batch_inference.py b/llama_stack/apis/batch_inference/batch_inference.py index 81826a7b1..ca5ba059f 100644 --- a/llama_stack/apis/batch_inference/batch_inference.py +++ b/llama_stack/apis/batch_inference/batch_inference.py @@ -54,7 +54,7 @@ class BatchChatCompletionResponse(BaseModel): @runtime_checkable class BatchInference(Protocol): - @webmethod(route="/batch-inference/completion") + @webmethod(route="/batch-inference/completion", method="POST") async def batch_completion( self, model: str, @@ -63,7 +63,7 @@ class BatchInference(Protocol): logprobs: Optional[LogProbConfig] = None, ) -> BatchCompletionResponse: ... - @webmethod(route="/batch-inference/chat-completion") + @webmethod(route="/batch-inference/chat-completion", method="POST") async def batch_chat_completion( self, model: str, diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py index 983e0e4ea..8b4c25a1d 100644 --- a/llama_stack/apis/datasetio/datasetio.py +++ b/llama_stack/apis/datasetio/datasetio.py @@ -29,7 +29,7 @@ class DatasetIO(Protocol): # keeping for aligning with inference/safety, but this is not used dataset_store: DatasetStore - @webmethod(route="/datasetio/get-rows-paginated", method="GET") + @webmethod(route="/datasetio/rows", method="GET") async def get_rows_paginated( self, dataset_id: str, @@ -38,7 +38,7 @@ class DatasetIO(Protocol): filter_condition: Optional[str] = None, ) -> PaginatedRowsResult: ... - @webmethod(route="/datasetio/append-rows", method="POST") + @webmethod(route="/datasetio/rows", method="POST") async def append_rows( self, dataset_id: str, rows: List[Dict[str, Any]] ) -> None: ... diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index 7afc0f8fd..5ad5bdcdb 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -7,11 +7,9 @@ from typing import Any, Dict, List, Literal, Optional, Protocol from llama_models.schema_utils import json_schema_type, webmethod - from pydantic import BaseModel, Field from llama_stack.apis.common.content_types import URL - from llama_stack.apis.common.type_system import ParamType from llama_stack.apis.resource import Resource, ResourceType @@ -44,8 +42,12 @@ class DatasetInput(CommonDatasetFields, BaseModel): provider_dataset_id: Optional[str] = None +class ListDatasetsResponse(BaseModel): + data: List[Dataset] + + class Datasets(Protocol): - @webmethod(route="/datasets/register", method="POST") + @webmethod(route="/datasets", method="POST") async def register_dataset( self, dataset_id: str, @@ -56,16 +58,16 @@ class Datasets(Protocol): metadata: Optional[Dict[str, Any]] = None, ) -> None: ... - @webmethod(route="/datasets/get", method="GET") + @webmethod(route="/datasets/{dataset_id}", method="GET") async def get_dataset( self, dataset_id: str, ) -> Optional[Dataset]: ... - @webmethod(route="/datasets/list", method="GET") - async def list_datasets(self) -> List[Dataset]: ... + @webmethod(route="/datasets", method="GET") + async def list_datasets(self) -> ListDatasetsResponse: ... - @webmethod(route="/datasets/unregister", method="POST") + @webmethod(route="/datasets/{dataset_id}", method="DELETE") async def unregister_dataset( self, dataset_id: str, diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index 1073d6310..1b8f768ad 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -7,9 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Protocol, Union from llama_models.schema_utils import json_schema_type, webmethod - from pydantic import BaseModel, Field - from typing_extensions import Annotated from llama_stack.apis.agents import AgentConfig @@ -76,7 +74,7 @@ class EvaluateResponse(BaseModel): class Eval(Protocol): - @webmethod(route="/eval/run-eval", method="POST") + @webmethod(route="/eval/run", method="POST") async def run_eval( self, task_id: str, @@ -92,11 +90,11 @@ class Eval(Protocol): task_config: EvalTaskConfig, ) -> EvaluateResponse: ... - @webmethod(route="/eval/job/status", method="GET") - async def job_status(self, task_id: str, job_id: str) -> Optional[JobStatus]: ... + @webmethod(route="/eval/jobs/{job_id}", method="GET") + async def job_status(self, job_id: str, task_id: str) -> Optional[JobStatus]: ... - @webmethod(route="/eval/job/cancel", method="POST") - async def job_cancel(self, task_id: str, job_id: str) -> None: ... + @webmethod(route="/eval/jobs/cancel", method="POST") + async def job_cancel(self, job_id: str, task_id: str) -> None: ... - @webmethod(route="/eval/job/result", method="GET") - async def job_result(self, task_id: str, job_id: str) -> EvaluateResponse: ... + @webmethod(route="/eval/jobs/{job_id}/result", method="GET") + async def job_result(self, job_id: str, task_id: str) -> EvaluateResponse: ... diff --git a/llama_stack/apis/eval_tasks/eval_tasks.py b/llama_stack/apis/eval_tasks/eval_tasks.py index 083681289..a0a533055 100644 --- a/llama_stack/apis/eval_tasks/eval_tasks.py +++ b/llama_stack/apis/eval_tasks/eval_tasks.py @@ -6,7 +6,6 @@ from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable from llama_models.schema_utils import json_schema_type, webmethod - from pydantic import BaseModel, Field from llama_stack.apis.resource import Resource, ResourceType @@ -40,15 +39,22 @@ class EvalTaskInput(CommonEvalTaskFields, BaseModel): provider_eval_task_id: Optional[str] = None +class ListEvalTasksResponse(BaseModel): + data: List[EvalTask] + + @runtime_checkable class EvalTasks(Protocol): - @webmethod(route="/eval-tasks/list", method="GET") - async def list_eval_tasks(self) -> List[EvalTask]: ... + @webmethod(route="/eval-tasks", method="GET") + async def list_eval_tasks(self) -> ListEvalTasksResponse: ... - @webmethod(route="/eval-tasks/get", method="GET") - async def get_eval_task(self, name: str) -> Optional[EvalTask]: ... + @webmethod(route="/eval-tasks/{eval_task_id}", method="GET") + async def get_eval_task( + self, + eval_task_id: str, + ) -> Optional[EvalTask]: ... - @webmethod(route="/eval-tasks/register", method="POST") + @webmethod(route="/eval-tasks", method="POST") async def register_eval_task( self, eval_task_id: str, diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index b525aa331..fdda5fe1b 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -291,7 +291,7 @@ class ModelStore(Protocol): class Inference(Protocol): model_store: ModelStore - @webmethod(route="/inference/completion") + @webmethod(route="/inference/completion", method="POST") async def completion( self, model_id: str, @@ -302,7 +302,7 @@ class Inference(Protocol): logprobs: Optional[LogProbConfig] = None, ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ... - @webmethod(route="/inference/chat-completion") + @webmethod(route="/inference/chat-completion", method="POST") async def chat_completion( self, model_id: str, @@ -319,7 +319,7 @@ class Inference(Protocol): ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk] ]: ... - @webmethod(route="/inference/embeddings") + @webmethod(route="/inference/embeddings", method="POST") async def embeddings( self, model_id: str, diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py index 699bce7b7..e2bb98217 100644 --- a/llama_stack/apis/inspect/inspect.py +++ b/llama_stack/apis/inspect/inspect.py @@ -34,10 +34,14 @@ class VersionInfo(BaseModel): version: str +class ListProvidersResponse(BaseModel): + data: List[ProviderInfo] + + @runtime_checkable class Inspect(Protocol): @webmethod(route="/providers/list", method="GET") - async def list_providers(self) -> Dict[str, ProviderInfo]: ... + async def list_providers(self) -> ListProvidersResponse: ... @webmethod(route="/routes/list", method="GET") async def list_routes(self) -> Dict[str, List[RouteInfo]]: ... diff --git a/llama_stack/apis/memory/memory.py b/llama_stack/apis/memory/memory.py index 8096a107a..6e6fcf697 100644 --- a/llama_stack/apis/memory/memory.py +++ b/llama_stack/apis/memory/memory.py @@ -50,7 +50,7 @@ class Memory(Protocol): # this will just block now until documents are inserted, but it should # probably return a Job instance which can be polled for completion - @webmethod(route="/memory/insert") + @webmethod(route="/memory/insert", method="POST") async def insert_documents( self, bank_id: str, @@ -58,7 +58,7 @@ class Memory(Protocol): ttl_seconds: Optional[int] = None, ) -> None: ... - @webmethod(route="/memory/query") + @webmethod(route="/memory/query", method="POST") async def query_documents( self, bank_id: str, diff --git a/llama_stack/apis/memory_banks/memory_banks.py b/llama_stack/apis/memory_banks/memory_banks.py index 21569beff..ec8ba824b 100644 --- a/llama_stack/apis/memory_banks/memory_banks.py +++ b/llama_stack/apis/memory_banks/memory_banks.py @@ -16,7 +16,6 @@ from typing import ( ) from llama_models.schema_utils import json_schema_type, register_schema, webmethod - from pydantic import BaseModel, Field from llama_stack.apis.resource import Resource, ResourceType @@ -133,16 +132,23 @@ class MemoryBankInput(BaseModel): provider_memory_bank_id: Optional[str] = None +class ListMemoryBanksResponse(BaseModel): + data: List[MemoryBank] + + @runtime_checkable @trace_protocol class MemoryBanks(Protocol): - @webmethod(route="/memory-banks/list", method="GET") - async def list_memory_banks(self) -> List[MemoryBank]: ... + @webmethod(route="/memory-banks", method="GET") + async def list_memory_banks(self) -> ListMemoryBanksResponse: ... - @webmethod(route="/memory-banks/get", method="GET") - async def get_memory_bank(self, memory_bank_id: str) -> Optional[MemoryBank]: ... + @webmethod(route="/memory-banks/{memory_bank_id}", method="GET") + async def get_memory_bank( + self, + memory_bank_id: str, + ) -> Optional[MemoryBank]: ... - @webmethod(route="/memory-banks/register", method="POST") + @webmethod(route="/memory-banks", method="POST") async def register_memory_bank( self, memory_bank_id: str, @@ -151,5 +157,5 @@ class MemoryBanks(Protocol): provider_memory_bank_id: Optional[str] = None, ) -> MemoryBank: ... - @webmethod(route="/memory-banks/unregister", method="POST") + @webmethod(route="/memory-banks/{memory_bank_id}", method="DELETE") async def unregister_memory_bank(self, memory_bank_id: str) -> None: ... diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 0ee23ecc1..3361c2836 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -52,16 +52,23 @@ class ModelInput(CommonModelFields): model_config = ConfigDict(protected_namespaces=()) +class ListModelsResponse(BaseModel): + data: List[Model] + + @runtime_checkable @trace_protocol class Models(Protocol): - @webmethod(route="/models/list", method="GET") - async def list_models(self) -> List[Model]: ... + @webmethod(route="/models", method="GET") + async def list_models(self) -> ListModelsResponse: ... - @webmethod(route="/models/get", method="GET") - async def get_model(self, identifier: str) -> Optional[Model]: ... + @webmethod(route="/models/{model_id}", method="GET") + async def get_model( + self, + model_id: str, + ) -> Optional[Model]: ... - @webmethod(route="/models/register", method="POST") + @webmethod(route="/models", method="POST") async def register_model( self, model_id: str, @@ -71,5 +78,8 @@ class Models(Protocol): model_type: Optional[ModelType] = None, ) -> Model: ... - @webmethod(route="/models/unregister", method="POST") - async def unregister_model(self, model_id: str) -> None: ... + @webmethod(route="/models/{model_id}", method="DELETE") + async def unregister_model( + self, + model_id: str, + ) -> None: ... diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index 8841dc1d0..b9aa3bbde 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -6,16 +6,13 @@ from datetime import datetime from enum import Enum - from typing import Any, Dict, List, Literal, Optional, Protocol, Union from llama_models.schema_utils import json_schema_type, webmethod - from pydantic import BaseModel, Field from typing_extensions import Annotated from llama_stack.apis.common.content_types import URL - from llama_stack.apis.common.job_types import JobStatus from llama_stack.apis.common.training_types import Checkpoint @@ -159,6 +156,10 @@ class PostTrainingJobStatusResponse(BaseModel): checkpoints: List[Checkpoint] = Field(default_factory=list) +class ListPostTrainingJobsResponse(BaseModel): + data: List[PostTrainingJob] + + @json_schema_type class PostTrainingJobArtifactsResponse(BaseModel): """Artifacts of a finetuning job.""" @@ -197,7 +198,7 @@ class PostTraining(Protocol): ) -> PostTrainingJob: ... @webmethod(route="/post-training/jobs", method="GET") - async def get_training_jobs(self) -> List[PostTrainingJob]: ... + async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ... @webmethod(route="/post-training/job/status", method="GET") async def get_training_job_status( diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py index dd24642b1..513733d1e 100644 --- a/llama_stack/apis/safety/safety.py +++ b/llama_stack/apis/safety/safety.py @@ -12,7 +12,6 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import Message from llama_stack.apis.shields import Shield - from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol @@ -49,7 +48,7 @@ class ShieldStore(Protocol): class Safety(Protocol): shield_store: ShieldStore - @webmethod(route="/safety/run-shield") + @webmethod(route="/safety/run-shield", method="POST") async def run_shield( self, shield_id: str, diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py index 996291dcc..5bacaaf66 100644 --- a/llama_stack/apis/scoring/scoring.py +++ b/llama_stack/apis/scoring/scoring.py @@ -11,7 +11,6 @@ from pydantic import BaseModel from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams - # mapping of metric to value ScoringResultRow = Dict[str, Any] @@ -43,7 +42,7 @@ class ScoringFunctionStore(Protocol): class Scoring(Protocol): scoring_function_store: ScoringFunctionStore - @webmethod(route="/scoring/score-batch") + @webmethod(route="/scoring/score-batch", method="POST") async def score_batch( self, dataset_id: str, @@ -51,7 +50,7 @@ class Scoring(Protocol): save_results_dataset: bool = False, ) -> ScoreBatchResponse: ... - @webmethod(route="/scoring/score") + @webmethod(route="/scoring/score", method="POST") async def score( self, input_rows: List[Dict[str, Any]], diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py index fc57cfbbf..3089dc0a4 100644 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/llama_stack/apis/scoring_functions/scoring_functions.py @@ -21,7 +21,6 @@ from pydantic import BaseModel, Field from typing_extensions import Annotated from llama_stack.apis.common.type_system import ParamType - from llama_stack.apis.resource import Resource, ResourceType @@ -129,15 +128,21 @@ class ScoringFnInput(CommonScoringFnFields, BaseModel): provider_scoring_fn_id: Optional[str] = None +class ListScoringFunctionsResponse(BaseModel): + data: List[ScoringFn] + + @runtime_checkable class ScoringFunctions(Protocol): - @webmethod(route="/scoring-functions/list", method="GET") - async def list_scoring_functions(self) -> List[ScoringFn]: ... + @webmethod(route="/scoring-functions", method="GET") + async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ... - @webmethod(route="/scoring-functions/get", method="GET") - async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]: ... + @webmethod(route="/scoring-functions/{scoring_fn_id}", method="GET") + async def get_scoring_function( + self, scoring_fn_id: str, / + ) -> Optional[ScoringFn]: ... - @webmethod(route="/scoring-functions/register", method="POST") + @webmethod(route="/scoring-functions", method="POST") async def register_scoring_function( self, scoring_fn_id: str, diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index 8d4d5f9fd..3dd685b14 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -38,16 +38,20 @@ class ShieldInput(CommonShieldFields): provider_shield_id: Optional[str] = None +class ListShieldsResponse(BaseModel): + data: List[Shield] + + @runtime_checkable @trace_protocol class Shields(Protocol): - @webmethod(route="/shields/list", method="GET") - async def list_shields(self) -> List[Shield]: ... + @webmethod(route="/shields", method="GET") + async def list_shields(self) -> ListShieldsResponse: ... - @webmethod(route="/shields/get", method="GET") + @webmethod(route="/shields/{identifier}", method="GET") async def get_shield(self, identifier: str) -> Optional[Shield]: ... - @webmethod(route="/shields/register", method="POST") + @webmethod(route="/shields", method="POST") async def register_shield( self, shield_id: str, diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index 23a475bff..d04cb67e3 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -185,8 +185,8 @@ class Telemetry(Protocol): order_by: Optional[List[str]] = None, ) -> List[Trace]: ... - @webmethod(route="/telemetry/get-span-tree", method="POST") - async def get_span_tree( + @webmethod(route="/telemetry/query-span-tree", method="POST") + async def query_span_tree( self, span_id: str, attributes_to_return: Optional[List[str]] = None, diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py index d2bdf9873..fb990cc41 100644 --- a/llama_stack/apis/tools/tools.py +++ b/llama_stack/apis/tools/tools.py @@ -74,13 +74,21 @@ class ToolInvocationResult(BaseModel): class ToolStore(Protocol): def get_tool(self, tool_name: str) -> Tool: ... - def get_tool_group(self, tool_group_id: str) -> ToolGroup: ... + def get_tool_group(self, toolgroup_id: str) -> ToolGroup: ... + + +class ListToolGroupsResponse(BaseModel): + data: List[ToolGroup] + + +class ListToolsResponse(BaseModel): + data: List[Tool] @runtime_checkable @trace_protocol class ToolGroups(Protocol): - @webmethod(route="/toolgroups/register", method="POST") + @webmethod(route="/toolgroups", method="POST") async def register_tool_group( self, toolgroup_id: str, @@ -91,27 +99,33 @@ class ToolGroups(Protocol): """Register a tool group""" ... - @webmethod(route="/toolgroups/get", method="GET") + @webmethod(route="/toolgroups/{toolgroup_id}", method="GET") async def get_tool_group( self, toolgroup_id: str, ) -> ToolGroup: ... - @webmethod(route="/toolgroups/list", method="GET") - async def list_tool_groups(self) -> List[ToolGroup]: + @webmethod(route="/toolgroups", method="GET") + async def list_tool_groups(self) -> ListToolGroupsResponse: """List tool groups with optional provider""" ... - @webmethod(route="/tools/list", method="GET") - async def list_tools(self, tool_group_id: Optional[str] = None) -> List[Tool]: + @webmethod(route="/tools", method="GET") + async def list_tools(self, toolgroup_id: Optional[str] = None) -> ListToolsResponse: """List tools with optional tool group""" ... - @webmethod(route="/tools/get", method="GET") - async def get_tool(self, tool_name: str) -> Tool: ... + @webmethod(route="/tools/{tool_name}", method="GET") + async def get_tool( + self, + tool_name: str, + ) -> Tool: ... - @webmethod(route="/toolgroups/unregister", method="POST") - async def unregister_tool_group(self, tool_group_id: str) -> None: + @webmethod(route="/toolgroups/{toolgroup_id}", method="DELETE") + async def unregister_toolgroup( + self, + toolgroup_id: str, + ) -> None: """Unregister a tool group""" ... diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index a3a64bf6b..e02606936 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -10,23 +10,32 @@ from pydantic import TypeAdapter from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.datasets import Dataset, Datasets -from llama_stack.apis.eval_tasks import EvalTask, EvalTasks +from llama_stack.apis.datasets import Dataset, Datasets, ListDatasetsResponse +from llama_stack.apis.eval_tasks import EvalTask, EvalTasks, ListEvalTasksResponse from llama_stack.apis.memory_banks import ( BankParams, + ListMemoryBanksResponse, MemoryBank, MemoryBanks, MemoryBankType, ) -from llama_stack.apis.models import Model, Models, ModelType +from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType from llama_stack.apis.resource import ResourceType from llama_stack.apis.scoring_functions import ( + ListScoringFunctionsResponse, ScoringFn, ScoringFnParams, ScoringFunctions, ) -from llama_stack.apis.shields import Shield, Shields -from llama_stack.apis.tools import Tool, ToolGroup, ToolGroups, ToolHost +from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields +from llama_stack.apis.tools import ( + ListToolGroupsResponse, + ListToolsResponse, + Tool, + ToolGroup, + ToolGroups, + ToolHost, +) from llama_stack.distribution.datatypes import ( RoutableObject, RoutableObjectWithProvider, @@ -215,11 +224,11 @@ class CommonRoutingTableImpl(RoutingTable): class ModelsRoutingTable(CommonRoutingTableImpl, Models): - async def list_models(self) -> List[Model]: - return await self.get_all_with_type("model") + async def list_models(self) -> ListModelsResponse: + return ListModelsResponse(data=await self.get_all_with_type("model")) - async def get_model(self, identifier: str) -> Optional[Model]: - return await self.get_object_by_identifier("model", identifier) + async def get_model(self, model_id: str) -> Optional[Model]: + return await self.get_object_by_identifier("model", model_id) async def register_model( self, @@ -265,8 +274,10 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): - async def list_shields(self) -> List[Shield]: - return await self.get_all_with_type(ResourceType.shield.value) + async def list_shields(self) -> ListShieldsResponse: + return ListShieldsResponse( + data=await self.get_all_with_type(ResourceType.shield.value) + ) async def get_shield(self, identifier: str) -> Optional[Shield]: return await self.get_object_by_identifier("shield", identifier) @@ -301,8 +312,8 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): - async def list_memory_banks(self) -> List[MemoryBank]: - return await self.get_all_with_type(ResourceType.memory_bank.value) + async def list_memory_banks(self) -> ListMemoryBanksResponse: + return ListMemoryBanksResponse(data=await self.get_all_with_type("memory_bank")) async def get_memory_bank(self, memory_bank_id: str) -> Optional[MemoryBank]: return await self.get_object_by_identifier("memory_bank", memory_bank_id) @@ -365,8 +376,10 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): - async def list_datasets(self) -> List[Dataset]: - return await self.get_all_with_type(ResourceType.dataset.value) + async def list_datasets(self) -> ListDatasetsResponse: + return ListDatasetsResponse( + data=await self.get_all_with_type(ResourceType.dataset.value) + ) async def get_dataset(self, dataset_id: str) -> Optional[Dataset]: return await self.get_object_by_identifier("dataset", dataset_id) @@ -410,8 +423,10 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): - async def list_scoring_functions(self) -> List[ScoringFn]: - return await self.get_all_with_type(ResourceType.scoring_function.value) + async def list_scoring_functions(self) -> ListScoringFunctionsResponse: + return ListScoringFunctionsResponse( + data=await self.get_all_with_type(ResourceType.scoring_function.value) + ) async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]: return await self.get_object_by_identifier("scoring_function", scoring_fn_id) @@ -447,11 +462,11 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): class EvalTasksRoutingTable(CommonRoutingTableImpl, EvalTasks): - async def list_eval_tasks(self) -> List[EvalTask]: - return await self.get_all_with_type(ResourceType.eval_task.value) + async def list_eval_tasks(self) -> ListEvalTasksResponse: + return ListEvalTasksResponse(data=await self.get_all_with_type("eval_task")) - async def get_eval_task(self, name: str) -> Optional[EvalTask]: - return await self.get_object_by_identifier("eval_task", name) + async def get_eval_task(self, eval_task_id: str) -> Optional[EvalTask]: + return await self.get_object_by_identifier("eval_task", eval_task_id) async def register_eval_task( self, @@ -485,14 +500,14 @@ class EvalTasksRoutingTable(CommonRoutingTableImpl, EvalTasks): class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): - async def list_tools(self, tool_group_id: Optional[str] = None) -> List[Tool]: + async def list_tools(self, toolgroup_id: Optional[str] = None) -> ListToolsResponse: tools = await self.get_all_with_type("tool") - if tool_group_id: - tools = [tool for tool in tools if tool.toolgroup_id == tool_group_id] - return tools + if toolgroup_id: + tools = [tool for tool in tools if tool.toolgroup_id == toolgroup_id] + return ListToolsResponse(data=tools) - async def list_tool_groups(self) -> List[ToolGroup]: - return await self.get_all_with_type("tool_group") + async def list_tool_groups(self) -> ListToolGroupsResponse: + return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group")) async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: return await self.get_object_by_identifier("tool_group", toolgroup_id) @@ -551,11 +566,11 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): ) ) - async def unregister_tool_group(self, tool_group_id: str) -> None: - tool_group = await self.get_tool_group(tool_group_id) + async def unregister_toolgroup(self, toolgroup_id: str) -> None: + tool_group = await self.get_tool_group(toolgroup_id) if tool_group is None: - raise ValueError(f"Tool group {tool_group_id} not found") - tools = await self.list_tools(tool_group_id) + raise ValueError(f"Tool group {toolgroup_id} not found") + tools = await self.list_tools(toolgroup_id).data for tool in tools: await self.unregister_object(tool) await self.unregister_object(tool_group) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 34334de77..2d216d314 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -14,16 +14,13 @@ import signal import sys import traceback import warnings - from contextlib import asynccontextmanager - from importlib.metadata import version as parse_version from pathlib import Path -from typing import Any, Union +from typing import Any, List, Union import yaml - -from fastapi import Body, FastAPI, HTTPException, Request +from fastapi import Body, FastAPI, HTTPException, Path as FastapiPath, Request from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, ValidationError @@ -31,7 +28,6 @@ from termcolor import cprint from typing_extensions import Annotated from llama_stack.distribution.datatypes import StackRunConfig - from llama_stack.distribution.distribution import builtin_automatically_routed_apis from llama_stack.distribution.request_headers import set_request_provider_data from llama_stack.distribution.resolver import InvalidProviderError @@ -41,13 +37,11 @@ from llama_stack.distribution.stack import ( replace_env_vars, validate_env_pair, ) - from llama_stack.providers.datatypes import Api from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig from llama_stack.providers.inline.telemetry.meta_reference.telemetry import ( TelemetryAdapter, ) - from llama_stack.providers.utils.telemetry.tracing import ( end_trace, setup_logger, @@ -56,7 +50,6 @@ from llama_stack.providers.utils.telemetry.tracing import ( from .endpoints import get_all_api_endpoints - REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -178,7 +171,7 @@ async def sse_generator(event_gen): ) -def create_dynamic_typed_route(func: Any, method: str): +def create_dynamic_typed_route(func: Any, method: str, route: str): async def endpoint(request: Request, **kwargs): set_request_provider_data(request.headers) @@ -196,6 +189,7 @@ def create_dynamic_typed_route(func: Any, method: str): raise translate_exception(e) from e sig = inspect.signature(func) + new_params = [ inspect.Parameter( "request", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request @@ -203,12 +197,21 @@ def create_dynamic_typed_route(func: Any, method: str): ] new_params.extend(sig.parameters.values()) + path_params = extract_path_params(route) if method == "post": - # make sure every parameter is annotated with Body() so FASTAPI doesn't - # do anything too intelligent and ask for some parameters in the query - # and some in the body + # Annotate parameters that are in the path with Path(...) and others with Body(...) new_params = [new_params[0]] + [ - param.replace(annotation=Annotated[param.annotation, Body(..., embed=True)]) + ( + param.replace( + annotation=Annotated[ + param.annotation, FastapiPath(..., title=param.name) + ] + ) + if param.name in path_params + else param.replace( + annotation=Annotated[param.annotation, Body(..., embed=True)] + ) + ) for param in new_params[1:] ] @@ -386,6 +389,7 @@ def main(): create_dynamic_typed_route( impl_method, endpoint.method, + endpoint.route, ) ) @@ -409,5 +413,13 @@ def main(): uvicorn.run(app, host=listen_host, port=args.port) +def extract_path_params(route: str) -> List[str]: + segments = route.split("/") + params = [ + seg[1:-1] for seg in segments if seg.startswith("{") and seg.endswith("}") + ] + return params + + if __name__ == "__main__": main() diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py index e3edf1e16..ad7bcd234 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/distribution/stack.py @@ -93,7 +93,11 @@ async def register_resources(run_config: StackRunConfig, impls: Dict[Api, Any]): await method(**obj.model_dump()) method = getattr(impls[api], list_method) - for obj in await method(): + response = await method() + + objects_to_process = response.data if hasattr(response, "data") else response + + for obj in objects_to_process: log.info( f"{rsrc.capitalize()}: {colored(obj.identifier, 'white', attrs=['bold'])} served by {colored(obj.provider_id, 'white', attrs=['bold'])}", ) diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 2299e80d1..2ebc7ded1 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -624,6 +624,10 @@ class ChatAgent(ShieldRunnerMixin): step_type=StepType.tool_execution.value, step_id=step_id, tool_call=tool_call, + delta=ToolCallDelta( + parse_status=ToolCallParseStatus.in_progress, + content=tool_call, + ), ) ) ) @@ -735,8 +739,8 @@ class ChatAgent(ShieldRunnerMixin): for toolgroup_name in agent_config_toolgroups: if toolgroup_name not in toolgroups_for_turn_set: continue - tools = await self.tool_groups_api.list_tools(tool_group_id=toolgroup_name) - for tool_def in tools: + tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name) + for tool_def in tools.data: if ( toolgroup_name.startswith("builtin") and toolgroup_name != MEMORY_GROUP diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index faff716ce..d22ef82ab 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -223,5 +223,5 @@ class MetaReferenceAgentsImpl(Agents): async def delete_agents_session(self, agent_id: str, session_id: str) -> None: await self.persistence_store.delete(f"session:{agent_id}:{session_id}") - async def delete_agents(self, agent_id: str) -> None: + async def delete_agent(self, agent_id: str) -> None: await self.persistence_store.delete(f"agent:{agent_id}") diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py index 90fbf7026..4abe13de2 100644 --- a/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional from llama_models.schema_utils import webmethod @@ -14,6 +14,7 @@ from llama_stack.apis.post_training import ( AlgorithmConfig, DPOAlignmentConfig, JobStatus, + ListPostTrainingJobsResponse, LoraFinetuningConfig, PostTrainingJob, PostTrainingJobArtifactsResponse, @@ -114,8 +115,8 @@ class TorchtunePostTrainingImpl: logger_config: Dict[str, Any], ) -> PostTrainingJob: ... - async def get_training_jobs(self) -> List[PostTrainingJob]: - return self.jobs_list + async def get_training_jobs(self) -> ListPostTrainingJobsResponse: + return ListPostTrainingJobsResponse(data=self.jobs_list) @webmethod(route="/post-training/job/status") async def get_training_job_status( diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 332a150cf..e2e318375 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -249,7 +249,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): order_by=order_by, ) - async def get_span_tree( + async def query_span_tree( self, span_id: str, attributes_to_return: Optional[List[str]] = None, diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 19a4064a0..747b64dd1 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -83,13 +83,13 @@ class TestClientTool(ClientTool): def agent_config(llama_stack_client): available_models = [ model.identifier - for model in llama_stack_client.models.list() + for model in llama_stack_client.models.list().data if model.identifier.startswith("meta-llama") and "405" not in model.identifier ] model_id = available_models[0] print(f"Using model: {model_id}") available_shields = [ - shield.identifier for shield in llama_stack_client.shields.list() + shield.identifier for shield in llama_stack_client.shields.list().data ] available_shields = available_shields[:1] print(f"Using shield: {available_shields}")