diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index c27bc6440..54031d839 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -16,7 +16,7 @@ import sys import fire import ruamel.yaml as yaml -from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402 +from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402 from llama_stack.core.stack import LlamaStack # noqa: E402 from .pyopenapi.options import Options # noqa: E402 @@ -25,7 +25,7 @@ from .pyopenapi.utility import Specification, validate_api # noqa: E402 def str_presenter(dumper, data): - if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith( + if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith( "#/components/schemas/" ): style = None @@ -58,7 +58,7 @@ def main(output_dir: str): server=Server(url="http://any-hosted-llama-stack.com"), info=Info( title="Llama Stack Specification", - version=LLAMA_STACK_API_VERSION, + version=LLAMA_STACK_API_V1, description="""This is the specification of the Llama Stack that provides a set of endpoints and their corresponding interfaces that are tailored to best leverage Llama Models.""", diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 5ac712f02..cdbf1c60c 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -829,7 +829,7 @@ class Generator: else: raise NotImplementedError(f"unknown HTTP method: {op.http_method}") - route = op.get_route() + route = op.get_route(op.webmethod) route = route.replace(":path", "") print(f"route: {route}") if route in paths: diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index 045e33848..ce33d3bb9 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -11,7 +11,7 @@ import typing from dataclasses import dataclass from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union -from llama_stack.apis.version import LLAMA_STACK_API_VERSION +from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA from termcolor import colored @@ -113,11 +113,13 @@ class EndpointOperation: request_examples: Optional[List[Any]] = None response_examples: Optional[List[Any]] = None - def get_route(self) -> str: - if self.route is not None: - return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")]) + def get_route(self, webmethod) -> str: + api_level = webmethod.level - route_parts = ["", LLAMA_STACK_API_VERSION, self.name] + if self.route is not None: + return "/".join(["", api_level, self.route.lstrip("/")]) + + route_parts = ["", api_level, self.name] for param_name, _ in self.path_params: route_parts.append("{" + param_name + "}") return "/".join(route_parts) @@ -152,33 +154,39 @@ def _get_endpoint_functions( functions = inspect.getmembers(endpoint, inspect.isfunction) for func_name, func_ref in functions: - webmethod = getattr(func_ref, "__webmethod__", None) - if not webmethod: + webmethods = [] + + # Check for multiple webmethods (stacked decorators) + if hasattr(func_ref, "__webmethods__"): + webmethods = func_ref.__webmethods__ + + if not webmethods: continue - print(f"Processing {colored(func_name, 'white')}...") - operation_name = func_name - - if webmethod.method == "GET": - prefix = "get" - elif webmethod.method == "DELETE": - prefix = "delete" - elif webmethod.method == "POST": - prefix = "post" - elif operation_name.startswith("get_") or operation_name.endswith("/get"): - prefix = "get" - elif ( - operation_name.startswith("delete_") - or operation_name.startswith("remove_") - or operation_name.endswith("/delete") - or operation_name.endswith("/remove") - ): - prefix = "delete" - else: - # by default everything else is a POST - prefix = "post" + for webmethod in webmethods: + print(f"Processing {colored(func_name, 'white')}...") + operation_name = func_name + + if webmethod.method == "GET": + prefix = "get" + elif webmethod.method == "DELETE": + prefix = "delete" + elif webmethod.method == "POST": + prefix = "post" + elif operation_name.startswith("get_") or operation_name.endswith("/get"): + prefix = "get" + elif ( + operation_name.startswith("delete_") + or operation_name.startswith("remove_") + or operation_name.endswith("/delete") + or operation_name.endswith("/remove") + ): + prefix = "delete" + else: + # by default everything else is a POST + prefix = "post" - yield prefix, operation_name, func_name, func_ref + yield prefix, operation_name, func_name, func_ref def _get_defining_class(member_fn: str, derived_cls: type) -> type: @@ -239,105 +247,101 @@ def get_endpoint_operations( "update", ], ): - # extract routing information from function metadata - webmethod = getattr(func_ref, "__webmethod__", None) - if webmethod is not None: + # Get all webmethods for this function + webmethods = getattr(func_ref, "__webmethods__", []) + + # Create one EndpointOperation for each webmethod + for webmethod in webmethods: route = webmethod.route route_params = _get_route_parameters(route) if route is not None else None public = webmethod.public request_examples = webmethod.request_examples response_examples = webmethod.response_examples - else: - route = None - route_params = None - public = False - request_examples = None - response_examples = None - # inspect function signature for path and query parameters, and request/response payload type - signature = get_signature(func_ref) + # inspect function signature for path and query parameters, and request/response payload type + signature = get_signature(func_ref) - path_params = [] - query_params = [] - request_params = [] - multipart_params = [] + path_params = [] + query_params = [] + request_params = [] + multipart_params = [] - for param_name, parameter in signature.parameters.items(): - param_type = _get_annotation_type(parameter.annotation, func_ref) + for param_name, parameter in signature.parameters.items(): + param_type = _get_annotation_type(parameter.annotation, func_ref) - # omit "self" for instance methods - if param_name == "self" and param_type is inspect.Parameter.empty: - continue + # omit "self" for instance methods + if param_name == "self" and param_type is inspect.Parameter.empty: + continue - # check if all parameters have explicit type - if parameter.annotation is inspect.Parameter.empty: + # check if all parameters have explicit type + if parameter.annotation is inspect.Parameter.empty: + raise ValidationError( + f"parameter '{param_name}' in function '{func_name}' has no type annotation" + ) + + is_multipart = _is_multipart_param(param_type) + + if prefix in ["get", "delete"]: + if route_params is not None and param_name in route_params: + path_params.append((param_name, param_type)) + else: + query_params.append((param_name, param_type)) + else: + if route_params is not None and param_name in route_params: + path_params.append((param_name, param_type)) + elif is_multipart: + multipart_params.append((param_name, param_type)) + else: + request_params.append((param_name, param_type)) + + # check if function has explicit return type + if signature.return_annotation is inspect.Signature.empty: raise ValidationError( - f"parameter '{param_name}' in function '{func_name}' has no type annotation" + f"function '{func_name}' has no return type annotation" ) - is_multipart = _is_multipart_param(param_type) - - if prefix in ["get", "delete"]: - if route_params is not None and param_name in route_params: - path_params.append((param_name, param_type)) - else: - query_params.append((param_name, param_type)) + return_type = _get_annotation_type(signature.return_annotation, func_ref) + + # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType] + # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request + if typing.get_origin(return_type) is collections.abc.Generator: + event_type, send_type, response_type = typing.get_args(return_type) + if send_type is not type(None): + raise ValidationError( + f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type" + ) else: - if route_params is not None and param_name in route_params: - path_params.append((param_name, param_type)) - elif is_multipart: - multipart_params.append((param_name, param_type)) + event_type = None + + def process_type(t): + if typing.get_origin(t) is collections.abc.AsyncIterator: + # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List + # or the item type. I am choosing it to be the latter + args = typing.get_args(t) + return args[0] + elif typing.get_origin(t) is typing.Union: + types = [process_type(a) for a in typing.get_args(t)] + return typing._UnionGenericAlias(typing.Union, tuple(types)) + else: + return t + + response_type = process_type(return_type) + + if prefix in ["delete", "remove"]: + http_method = HTTPMethod.DELETE + elif prefix == "post": + http_method = HTTPMethod.POST + elif prefix == "get": + http_method = HTTPMethod.GET + elif prefix == "set": + http_method = HTTPMethod.PUT + elif prefix == "update": + http_method = HTTPMethod.PATCH else: - request_params.append((param_name, param_type)) + raise ValidationError(f"unknown prefix {prefix}") - # check if function has explicit return type - if signature.return_annotation is inspect.Signature.empty: - raise ValidationError( - f"function '{func_name}' has no return type annotation" - ) - - return_type = _get_annotation_type(signature.return_annotation, func_ref) - - # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType] - # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request - if typing.get_origin(return_type) is collections.abc.Generator: - event_type, send_type, response_type = typing.get_args(return_type) - if send_type is not type(None): - raise ValidationError( - f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type" - ) - else: - event_type = None - - def process_type(t): - if typing.get_origin(t) is collections.abc.AsyncIterator: - # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List - # or the item type. I am choosing it to be the latter - args = typing.get_args(t) - return args[0] - elif typing.get_origin(t) is typing.Union: - types = [process_type(a) for a in typing.get_args(t)] - return typing._UnionGenericAlias(typing.Union, tuple(types)) - else: - return t - - response_type = process_type(return_type) - - if prefix in ["delete", "remove"]: - http_method = HTTPMethod.DELETE - elif prefix == "post": - http_method = HTTPMethod.POST - elif prefix == "get": - http_method = HTTPMethod.GET - elif prefix == "set": - http_method = HTTPMethod.PUT - elif prefix == "update": - http_method = HTTPMethod.PATCH - else: - raise ValidationError(f"unknown prefix {prefix}") - - result.append( - EndpointOperation( + # Create an EndpointOperation for this specific webmethod + operation = EndpointOperation( defining_class=_get_defining_class(func_name, endpoint), name=operation_name, func_name=func_name, @@ -354,7 +358,10 @@ def get_endpoint_operations( request_examples=request_examples if use_examples else None, response_examples=response_examples if use_examples else None, ) - ) + + # Store the specific webmethod with this operation + operation.webmethod = webmethod + result.append(operation) if not result: raise ValidationError(f"no eligible endpoint operations in type {endpoint}") diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 36cb025c4..5a61221c1 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -175,6 +175,43 @@ } } }, + "/v1alpha/post-training/job/cancel": { + "post": { + "responses": { + "200": { + "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "PostTraining (Coming Soon)" + ], + "summary": "Cancel a training job.", + "description": "Cancel a training job.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CancelTrainingJobRequest" + } + } + }, + "required": true + } + } + }, "/v1/post-training/job/cancel": { "post": { "responses": { @@ -1179,6 +1216,60 @@ } } }, + "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": { + "post": { + "responses": { + "200": { + "description": "EvaluateResponse object containing generations and scores.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "summary": "Evaluate a list of rows on a benchmark.", + "description": "Evaluate a list of rows on a benchmark.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to run the evaluation on.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateRowsRequest" + } + } + }, + "required": true + } + } + }, "/v1/eval/benchmarks/{benchmark_id}/evaluations": { "post": { "responses": { @@ -1366,6 +1457,85 @@ ] } }, + "/v1alpha/eval/benchmarks/{benchmark_id}": { + "get": { + "responses": { + "200": { + "description": "A Benchmark.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Benchmark" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Benchmarks" + ], + "summary": "Get a benchmark by its ID.", + "description": "Get a benchmark by its ID.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to get.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Benchmarks" + ], + "summary": "Unregister a benchmark.", + "description": "Unregister a benchmark.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to unregister.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/eval/benchmarks/{benchmark_id}": { "get": { "responses": { @@ -2079,6 +2249,50 @@ ] } }, + "/v1alpha/post-training/job/artifacts": { + "get": { + "responses": { + "200": { + "description": "A PostTrainingJobArtifactsResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "PostTraining (Coming Soon)" + ], + "summary": "Get the artifacts of a training job.", + "description": "Get the artifacts of a training job.", + "parameters": [ + { + "name": "job_uuid", + "in": "query", + "description": "The UUID of the job to get the artifacts of.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/post-training/job/artifacts": { "get": { "responses": { @@ -2123,6 +2337,50 @@ ] } }, + "/v1alpha/post-training/job/status": { + "get": { + "responses": { + "200": { + "description": "A PostTrainingJobStatusResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PostTrainingJobStatusResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "PostTraining (Coming Soon)" + ], + "summary": "Get the status of a training job.", + "description": "Get the status of a training job.", + "parameters": [ + { + "name": "job_uuid", + "in": "query", + "description": "The UUID of the job to get the status of.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/post-training/job/status": { "get": { "responses": { @@ -2167,6 +2425,40 @@ ] } }, + "/v1alpha/post-training/jobs": { + "get": { + "responses": { + "200": { + "description": "A ListPostTrainingJobsResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListPostTrainingJobsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "PostTraining (Coming Soon)" + ], + "summary": "Get all training jobs.", + "description": "Get all training jobs.", + "parameters": [] + } + }, "/v1/post-training/jobs": { "get": { "responses": { @@ -2538,6 +2830,103 @@ ] } }, + "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { + "get": { + "responses": { + "200": { + "description": "The status of the evaluation job.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Job" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "summary": "Get the status of a job.", + "description": "Get the status of a job.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to run the evaluation on.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "job_id", + "in": "path", + "description": "The ID of the job to get the status of.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "summary": "Cancel a job.", + "description": "Cancel a job.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to run the evaluation on.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "job_id", + "in": "path", + "description": "The ID of the job to cancel.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { "get": { "responses": { @@ -2635,6 +3024,59 @@ ] } }, + "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": { + "get": { + "responses": { + "200": { + "description": "The result of the job.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "summary": "Get the result of a job.", + "description": "Get the result of a job.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to run the evaluation on.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "job_id", + "in": "path", + "description": "The ID of the job to get the result of.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": { "get": { "responses": { @@ -2750,6 +3192,75 @@ ] } }, + "/v1alpha/eval/benchmarks": { + "get": { + "responses": { + "200": { + "description": "A ListBenchmarksResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListBenchmarksResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Benchmarks" + ], + "summary": "List all benchmarks.", + "description": "List all benchmarks.", + "parameters": [] + }, + "post": { + "responses": { + "200": { + "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Benchmarks" + ], + "summary": "Register a benchmark.", + "description": "Register a benchmark.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterBenchmarkRequest" + } + } + }, + "required": true + } + } + }, "/v1/eval/benchmarks": { "get": { "responses": { @@ -4783,6 +5294,50 @@ } } }, + "/v1alpha/post-training/preference-optimize": { + "post": { + "responses": { + "200": { + "description": "A PostTrainingJob.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PostTrainingJob" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "PostTraining (Coming Soon)" + ], + "summary": "Run preference optimization of a model.", + "description": "Run preference optimization of a model.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PreferenceOptimizeRequest" + } + } + }, + "required": true + } + } + }, "/v1/post-training/preference-optimize": { "post": { "responses": { @@ -5178,6 +5733,60 @@ } } }, + "/v1alpha/eval/benchmarks/{benchmark_id}/jobs": { + "post": { + "responses": { + "200": { + "description": "The job that was created to run the evaluation.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Job" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "summary": "Run an evaluation on a benchmark.", + "description": "Run an evaluation on a benchmark.", + "parameters": [ + { + "name": "benchmark_id", + "in": "path", + "description": "The ID of the benchmark to run the evaluation on.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunEvalRequest" + } + } + }, + "required": true + } + } + }, "/v1/eval/benchmarks/{benchmark_id}/jobs": { "post": { "responses": { @@ -5499,6 +6108,50 @@ } } }, + "/v1alpha/post-training/supervised-fine-tune": { + "post": { + "responses": { + "200": { + "description": "A PostTrainingJob.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PostTrainingJob" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "PostTraining (Coming Soon)" + ], + "summary": "Run supervised fine-tuning of a model.", + "description": "Run supervised fine-tuning of a model.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SupervisedFineTuneRequest" + } + } + }, + "required": true + } + } + }, "/v1/post-training/supervised-fine-tune": { "post": { "responses": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index c4b82b630..9c0558658 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -109,6 +109,32 @@ paths: schema: $ref: '#/components/schemas/BatchCompletionRequest' required: true + /v1alpha/post-training/job/cancel: + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Cancel a training job. + description: Cancel a training job. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CancelTrainingJobRequest' + required: true /v1/post-training/job/cancel: post: responses: @@ -832,6 +858,44 @@ paths: schema: $ref: '#/components/schemas/EmbeddingsRequest' required: true + /v1alpha/eval/benchmarks/{benchmark_id}/evaluations: + post: + responses: + '200': + description: >- + EvaluateResponse object containing generations and scores. + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Evaluate a list of rows on a benchmark. + description: Evaluate a list of rows on a benchmark. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateRowsRequest' + required: true /v1/eval/benchmarks/{benchmark_id}/evaluations: post: responses: @@ -962,6 +1026,61 @@ paths: required: true schema: type: string + /v1alpha/eval/benchmarks/{benchmark_id}: + get: + responses: + '200': + description: A Benchmark. + content: + application/json: + schema: + $ref: '#/components/schemas/Benchmark' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Get a benchmark by its ID. + description: Get a benchmark by its ID. + parameters: + - name: benchmark_id + in: path + description: The ID of the benchmark to get. + required: true + schema: + type: string + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Unregister a benchmark. + description: Unregister a benchmark. + parameters: + - name: benchmark_id + in: path + description: The ID of the benchmark to unregister. + required: true + schema: + type: string /v1/eval/benchmarks/{benchmark_id}: get: responses: @@ -1458,6 +1577,37 @@ paths: required: true schema: type: string + /v1alpha/post-training/job/artifacts: + get: + responses: + '200': + description: A PostTrainingJobArtifactsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get the artifacts of a training job. + description: Get the artifacts of a training job. + parameters: + - name: job_uuid + in: query + description: >- + The UUID of the job to get the artifacts of. + required: true + schema: + type: string /v1/post-training/job/artifacts: get: responses: @@ -1489,6 +1639,37 @@ paths: required: true schema: type: string + /v1alpha/post-training/job/status: + get: + responses: + '200': + description: A PostTrainingJobStatusResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJobStatusResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get the status of a training job. + description: Get the status of a training job. + parameters: + - name: job_uuid + in: query + description: >- + The UUID of the job to get the status of. + required: true + schema: + type: string /v1/post-training/job/status: get: responses: @@ -1520,6 +1701,30 @@ paths: required: true schema: type: string + /v1alpha/post-training/jobs: + get: + responses: + '200': + description: A ListPostTrainingJobsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListPostTrainingJobsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Get all training jobs. + description: Get all training jobs. + parameters: [] /v1/post-training/jobs: get: responses: @@ -1804,6 +2009,75 @@ paths: required: false schema: type: integer + /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}: + get: + responses: + '200': + description: The status of the evaluation job. + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Get the status of a job. + description: Get the status of a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to get the status of. + required: true + schema: + type: string + delete: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Cancel a job. + description: Cancel a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to cancel. + required: true + schema: + type: string /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: @@ -1873,6 +2147,43 @@ paths: required: true schema: type: string + /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: + get: + responses: + '200': + description: The result of the job. + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Get the result of a job. + description: Get the result of a job. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + - name: job_id + in: path + description: The ID of the job to get the result of. + required: true + schema: + type: string /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: get: responses: @@ -1953,6 +2264,55 @@ paths: required: false schema: type: integer + /v1alpha/eval/benchmarks: + get: + responses: + '200': + description: A ListBenchmarksResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListBenchmarksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: List all benchmarks. + description: List all benchmarks. + parameters: [] + post: + responses: + '200': + description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Benchmarks + summary: Register a benchmark. + description: Register a benchmark. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterBenchmarkRequest' + required: true /v1/eval/benchmarks: get: responses: @@ -3454,6 +3814,36 @@ paths: schema: $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' required: true + /v1alpha/post-training/preference-optimize: + post: + responses: + '200': + description: A PostTrainingJob. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Run preference optimization of a model. + description: Run preference optimization of a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/PreferenceOptimizeRequest' + required: true /v1/post-training/preference-optimize: post: responses: @@ -3735,6 +4125,44 @@ paths: schema: $ref: '#/components/schemas/ResumeAgentTurnRequest' required: true + /v1alpha/eval/benchmarks/{benchmark_id}/jobs: + post: + responses: + '200': + description: >- + The job that was created to run the evaluation. + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + summary: Run an evaluation on a benchmark. + description: Run an evaluation on a benchmark. + parameters: + - name: benchmark_id + in: path + description: >- + The ID of the benchmark to run the evaluation on. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunEvalRequest' + required: true /v1/eval/benchmarks/{benchmark_id}/jobs: post: responses: @@ -3961,6 +4389,36 @@ paths: schema: $ref: '#/components/schemas/SetDefaultVersionRequest' required: true + /v1alpha/post-training/supervised-fine-tune: + post: + responses: + '200': + description: A PostTrainingJob. + content: + application/json: + schema: + $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - PostTraining (Coming Soon) + summary: Run supervised fine-tuning of a model. + description: Run supervised fine-tuning of a model. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/SupervisedFineTuneRequest' + required: true /v1/post-training/supervised-fine-tune: post: responses: diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index 7dd3e9289..e53ca82e2 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -27,6 +27,7 @@ from llama_stack.apis.inference import ( ) from llama_stack.apis.safety import SafetyViolation from llama_stack.apis.tools import ToolDef +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from .openai_responses import ( @@ -481,7 +482,7 @@ class Agents(Protocol): - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details. """ - @webmethod(route="/agents", method="POST", descriptive_name="create_agent") + @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1) async def create_agent( self, agent_config: AgentConfig, @@ -494,7 +495,10 @@ class Agents(Protocol): ... @webmethod( - route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn" + route="/agents/{agent_id}/session/{session_id}/turn", + method="POST", + descriptive_name="create_agent_turn", + level=LLAMA_STACK_API_V1, ) async def create_agent_turn( self, @@ -524,6 +528,7 @@ class Agents(Protocol): route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", method="POST", descriptive_name="resume_agent_turn", + level=LLAMA_STACK_API_V1, ) async def resume_agent_turn( self, @@ -549,6 +554,7 @@ class Agents(Protocol): @webmethod( route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", method="GET", + level=LLAMA_STACK_API_V1, ) async def get_agents_turn( self, @@ -568,6 +574,7 @@ class Agents(Protocol): @webmethod( route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", method="GET", + level=LLAMA_STACK_API_V1, ) async def get_agents_step( self, @@ -586,7 +593,12 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session") + @webmethod( + route="/agents/{agent_id}/session", + method="POST", + descriptive_name="create_agent_session", + level=LLAMA_STACK_API_V1, + ) async def create_agent_session( self, agent_id: str, @@ -600,7 +612,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET") + @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1) async def get_agents_session( self, session_id: str, @@ -616,7 +628,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE") + @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1) async def delete_agents_session( self, session_id: str, @@ -629,7 +641,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents/{agent_id}", method="DELETE") + @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1) async def delete_agent( self, agent_id: str, @@ -640,7 +652,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents", method="GET") + @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1) async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse: """List all agents. @@ -650,7 +662,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents/{agent_id}", method="GET") + @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1) async def get_agent(self, agent_id: str) -> Agent: """Describe an agent by its ID. @@ -659,7 +671,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/agents/{agent_id}/sessions", method="GET") + @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1) async def list_agent_sessions( self, agent_id: str, @@ -682,7 +694,7 @@ class Agents(Protocol): # # Both of these APIs are inherently stateful. - @webmethod(route="/openai/v1/responses/{response_id}", method="GET") + @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1) async def get_openai_response( self, response_id: str, @@ -694,7 +706,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/openai/v1/responses", method="POST") + @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1) async def create_openai_response( self, input: str | list[OpenAIResponseInput], @@ -719,7 +731,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/openai/v1/responses", method="GET") + @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1) async def list_openai_responses( self, after: str | None = None, @@ -737,7 +749,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET") + @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1) async def list_openai_response_input_items( self, response_id: str, @@ -759,7 +771,7 @@ class Agents(Protocol): """ ... - @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE") + @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1) async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: """Delete an OpenAI response by its ID. diff --git a/llama_stack/apis/batch_inference/batch_inference.py b/llama_stack/apis/batch_inference/batch_inference.py index b2aa637e2..43ade0221 100644 --- a/llama_stack/apis/batch_inference/batch_inference.py +++ b/llama_stack/apis/batch_inference/batch_inference.py @@ -17,6 +17,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import webmethod @@ -30,7 +31,7 @@ class BatchInference(Protocol): including (post-training, evals, etc). """ - @webmethod(route="/batch-inference/completion", method="POST") + @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1) async def completion( self, model: str, @@ -50,7 +51,7 @@ class BatchInference(Protocol): """ ... - @webmethod(route="/batch-inference/chat-completion", method="POST") + @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1) async def chat_completion( self, model: str, diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py index c6bbd92eb..5890cbe04 100644 --- a/llama_stack/apis/batches/batches.py +++ b/llama_stack/apis/batches/batches.py @@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import json_schema_type, webmethod try: @@ -42,7 +43,7 @@ class Batches(Protocol): Note: This API is currently under active development and may undergo changes. """ - @webmethod(route="/openai/v1/batches", method="POST") + @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1) async def create_batch( self, input_file_id: str, @@ -62,7 +63,7 @@ class Batches(Protocol): """ ... - @webmethod(route="/openai/v1/batches/{batch_id}", method="GET") + @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1) async def retrieve_batch(self, batch_id: str) -> BatchObject: """Retrieve information about a specific batch. @@ -71,7 +72,7 @@ class Batches(Protocol): """ ... - @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST") + @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1) async def cancel_batch(self, batch_id: str) -> BatchObject: """Cancel a batch that is in progress. @@ -80,7 +81,7 @@ class Batches(Protocol): """ ... - @webmethod(route="/openai/v1/batches", method="GET") + @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1) async def list_batches( self, after: str | None = None, diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py index 8d0a25e7b..d87d45a60 100644 --- a/llama_stack/apis/benchmarks/benchmarks.py +++ b/llama_stack/apis/benchmarks/benchmarks.py @@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.schema_utils import json_schema_type, webmethod @@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel): @runtime_checkable class Benchmarks(Protocol): - @webmethod(route="/eval/benchmarks", method="GET") + @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA) async def list_benchmarks(self) -> ListBenchmarksResponse: """List all benchmarks. @@ -61,7 +63,8 @@ class Benchmarks(Protocol): """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET") + @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) async def get_benchmark( self, benchmark_id: str, @@ -73,7 +76,8 @@ class Benchmarks(Protocol): """ ... - @webmethod(route="/eval/benchmarks", method="POST") + @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def register_benchmark( self, benchmark_id: str, @@ -94,7 +98,8 @@ class Benchmarks(Protocol): """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE") + @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) async def unregister_benchmark(self, benchmark_id: str) -> None: """Unregister a benchmark. diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py index 1183983cc..27e5336bc 100644 --- a/llama_stack/apis/datasetio/datasetio.py +++ b/llama_stack/apis/datasetio/datasetio.py @@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.datasets import Dataset +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import webmethod @@ -20,7 +21,7 @@ class DatasetIO(Protocol): # keeping for aligning with inference/safety, but this is not used dataset_store: DatasetStore - @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET") + @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1) async def iterrows( self, dataset_id: str, @@ -44,7 +45,7 @@ class DatasetIO(Protocol): """ ... - @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST") + @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1) async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: """Append rows to a dataset. diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index f347e0e29..be0cbf09a 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol from pydantic import BaseModel, Field from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel): class Datasets(Protocol): - @webmethod(route="/datasets", method="POST") + @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1) async def register_dataset( self, purpose: DatasetPurpose, @@ -214,7 +215,7 @@ class Datasets(Protocol): """ ... - @webmethod(route="/datasets/{dataset_id:path}", method="GET") + @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_dataset( self, dataset_id: str, @@ -226,7 +227,7 @@ class Datasets(Protocol): """ ... - @webmethod(route="/datasets", method="GET") + @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1) async def list_datasets(self) -> ListDatasetsResponse: """List all datasets. @@ -234,7 +235,7 @@ class Datasets(Protocol): """ ... - @webmethod(route="/datasets/{dataset_id:path}", method="DELETE") + @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) async def unregister_dataset( self, dataset_id: str, diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index 83a0a8e56..bb81778f1 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.scoring import ScoringResult from llama_stack.apis.scoring_functions import ScoringFnParams +from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel): class Eval(Protocol): """Llama Stack Evaluation API for running evaluations on model and agent candidates.""" - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST") + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def run_eval( self, benchmark_id: str, @@ -97,7 +99,10 @@ class Eval(Protocol): """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST") + @webmethod( + route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True + ) + @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def evaluate_rows( self, benchmark_id: str, @@ -115,7 +120,10 @@ class Eval(Protocol): """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") + @webmethod( + route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True + ) + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) async def job_status(self, benchmark_id: str, job_id: str) -> Job: """Get the status of a job. @@ -125,7 +133,13 @@ class Eval(Protocol): """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE") + @webmethod( + route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", + method="DELETE", + level=LLAMA_STACK_API_V1, + deprecated=True, + ) + @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) async def job_cancel(self, benchmark_id: str, job_id: str) -> None: """Cancel a job. @@ -134,7 +148,15 @@ class Eval(Protocol): """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET") + @webmethod( + route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", + method="GET", + level=LLAMA_STACK_API_V1, + deprecated=True, + ) + @webmethod( + route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA + ) async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: """Get the result of a job. diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index 124e4bc8e..7e45b55ee 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile from pydantic import BaseModel, Field from llama_stack.apis.common.responses import Order +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel): @trace_protocol class Files(Protocol): # OpenAI Files API Endpoints - @webmethod(route="/openai/v1/files", method="POST") + @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1) async def openai_upload_file( self, file: Annotated[UploadFile, File()], @@ -127,7 +128,7 @@ class Files(Protocol): """ ... - @webmethod(route="/openai/v1/files", method="GET") + @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1) async def openai_list_files( self, after: str | None = None, @@ -146,7 +147,7 @@ class Files(Protocol): """ ... - @webmethod(route="/openai/v1/files/{file_id}", method="GET") + @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1) async def openai_retrieve_file( self, file_id: str, @@ -159,7 +160,7 @@ class Files(Protocol): """ ... - @webmethod(route="/openai/v1/files/{file_id}", method="DELETE") + @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1) async def openai_delete_file( self, file_id: str, @@ -172,7 +173,7 @@ class Files(Protocol): """ ... - @webmethod(route="/openai/v1/files/{file_id}/content", method="GET") + @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1) async def openai_retrieve_file_content( self, file_id: str, diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index bd4737ca7..4f5332b5f 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte from llama_stack.apis.common.responses import Order from llama_stack.apis.models import Model from llama_stack.apis.telemetry import MetricResponseMixin +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, @@ -1026,7 +1027,7 @@ class InferenceProvider(Protocol): model_store: ModelStore | None = None - @webmethod(route="/inference/completion", method="POST") + @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1) async def completion( self, model_id: str, @@ -1049,7 +1050,7 @@ class InferenceProvider(Protocol): """ ... - @webmethod(route="/inference/batch-completion", method="POST", experimental=True) + @webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1) async def batch_completion( self, model_id: str, @@ -1070,7 +1071,7 @@ class InferenceProvider(Protocol): raise NotImplementedError("Batch completion is not implemented") return # this is so mypy's safe-super rule will consider the method concrete - @webmethod(route="/inference/chat-completion", method="POST") + @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1) async def chat_completion( self, model_id: str, @@ -1110,7 +1111,7 @@ class InferenceProvider(Protocol): """ ... - @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True) + @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1) async def batch_chat_completion( self, model_id: str, @@ -1135,7 +1136,7 @@ class InferenceProvider(Protocol): raise NotImplementedError("Batch chat completion is not implemented") return # this is so mypy's safe-super rule will consider the method concrete - @webmethod(route="/inference/embeddings", method="POST") + @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1) async def embeddings( self, model_id: str, @@ -1155,7 +1156,7 @@ class InferenceProvider(Protocol): """ ... - @webmethod(route="/inference/rerank", method="POST", experimental=True) + @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1) async def rerank( self, model: str, @@ -1174,7 +1175,7 @@ class InferenceProvider(Protocol): raise NotImplementedError("Reranking is not implemented") return # this is so mypy's safe-super rule will consider the method concrete - @webmethod(route="/openai/v1/completions", method="POST") + @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1) async def openai_completion( self, # Standard OpenAI completion parameters @@ -1225,7 +1226,7 @@ class InferenceProvider(Protocol): """ ... - @webmethod(route="/openai/v1/chat/completions", method="POST") + @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1) async def openai_chat_completion( self, model: str, @@ -1281,7 +1282,7 @@ class InferenceProvider(Protocol): """ ... - @webmethod(route="/openai/v1/embeddings", method="POST") + @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1) async def openai_embeddings( self, model: str, @@ -1310,7 +1311,7 @@ class Inference(InferenceProvider): - Embedding models: these models generate embeddings to be used for semantic search. """ - @webmethod(route="/openai/v1/chat/completions", method="GET") + @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1) async def list_chat_completions( self, after: str | None = None, @@ -1328,7 +1329,7 @@ class Inference(InferenceProvider): """ raise NotImplementedError("List chat completions is not implemented") - @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET") + @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1) async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: """Describe a chat completion by its ID. diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py index 91d9c3da7..e859dbe59 100644 --- a/llama_stack/apis/inspect/inspect.py +++ b/llama_stack/apis/inspect/inspect.py @@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable from pydantic import BaseModel +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.datatypes import HealthStatus from llama_stack.schema_utils import json_schema_type, webmethod @@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel): @runtime_checkable class Inspect(Protocol): - @webmethod(route="/inspect/routes", method="GET") + @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1) async def list_routes(self) -> ListRoutesResponse: """List all available API routes with their methods and implementing providers. @@ -65,7 +66,7 @@ class Inspect(Protocol): """ ... - @webmethod(route="/health", method="GET") + @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1) async def health(self) -> HealthInfo: """Get the current health status of the service. @@ -73,7 +74,7 @@ class Inspect(Protocol): """ ... - @webmethod(route="/version", method="GET") + @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1) async def version(self) -> VersionInfo: """Get the version of the service. diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 1af6fc9df..a4f6a888b 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field, field_validator from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel): @runtime_checkable @trace_protocol class Models(Protocol): - @webmethod(route="/models", method="GET") + @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1) async def list_models(self) -> ListModelsResponse: """List all models. @@ -110,7 +111,7 @@ class Models(Protocol): """ ... - @webmethod(route="/openai/v1/models", method="GET") + @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1) async def openai_list_models(self) -> OpenAIListModelsResponse: """List models using the OpenAI API. @@ -118,7 +119,7 @@ class Models(Protocol): """ ... - @webmethod(route="/models/{model_id:path}", method="GET") + @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_model( self, model_id: str, @@ -130,7 +131,7 @@ class Models(Protocol): """ ... - @webmethod(route="/models", method="POST") + @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1) async def register_model( self, model_id: str, @@ -150,7 +151,7 @@ class Models(Protocol): """ ... - @webmethod(route="/models/{model_id:path}", method="DELETE") + @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) async def unregister_model( self, model_id: str, diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index c16221289..30a51f765 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -13,6 +13,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.job_types import JobStatus from llama_stack.apis.common.training_types import Checkpoint +from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel): class PostTraining(Protocol): - @webmethod(route="/post-training/supervised-fine-tune", method="POST") + @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def supervised_fine_tune( self, job_uuid: str, @@ -310,7 +312,8 @@ class PostTraining(Protocol): """ ... - @webmethod(route="/post-training/preference-optimize", method="POST") + @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def preference_optimize( self, job_uuid: str, @@ -332,7 +335,8 @@ class PostTraining(Protocol): """ ... - @webmethod(route="/post-training/jobs", method="GET") + @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA) async def get_training_jobs(self) -> ListPostTrainingJobsResponse: """Get all training jobs. @@ -340,7 +344,8 @@ class PostTraining(Protocol): """ ... - @webmethod(route="/post-training/job/status", method="GET") + @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA) async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: """Get the status of a training job. @@ -349,7 +354,8 @@ class PostTraining(Protocol): """ ... - @webmethod(route="/post-training/job/cancel", method="POST") + @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def cancel_training_job(self, job_uuid: str) -> None: """Cancel a training job. @@ -357,7 +363,8 @@ class PostTraining(Protocol): """ ... - @webmethod(route="/post-training/job/artifacts", method="GET") + @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) + @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA) async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: """Get the artifacts of a training job. diff --git a/llama_stack/apis/prompts/prompts.py b/llama_stack/apis/prompts/prompts.py index e6a376c3f..c56185e25 100644 --- a/llama_stack/apis/prompts/prompts.py +++ b/llama_stack/apis/prompts/prompts.py @@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable from pydantic import BaseModel, Field, field_validator, model_validator +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -95,7 +96,7 @@ class ListPromptsResponse(BaseModel): class Prompts(Protocol): """Protocol for prompt management operations.""" - @webmethod(route="/prompts", method="GET") + @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1) async def list_prompts(self) -> ListPromptsResponse: """List all prompts. @@ -103,7 +104,7 @@ class Prompts(Protocol): """ ... - @webmethod(route="/prompts/{prompt_id}/versions", method="GET") + @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1) async def list_prompt_versions( self, prompt_id: str, @@ -115,7 +116,7 @@ class Prompts(Protocol): """ ... - @webmethod(route="/prompts/{prompt_id}", method="GET") + @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1) async def get_prompt( self, prompt_id: str, @@ -129,7 +130,7 @@ class Prompts(Protocol): """ ... - @webmethod(route="/prompts", method="POST") + @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1) async def create_prompt( self, prompt: str, @@ -143,7 +144,7 @@ class Prompts(Protocol): """ ... - @webmethod(route="/prompts/{prompt_id}", method="PUT") + @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1) async def update_prompt( self, prompt_id: str, @@ -163,7 +164,7 @@ class Prompts(Protocol): """ ... - @webmethod(route="/prompts/{prompt_id}", method="DELETE") + @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1) async def delete_prompt( self, prompt_id: str, @@ -174,7 +175,7 @@ class Prompts(Protocol): """ ... - @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT") + @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1) async def set_default_version( self, prompt_id: str, diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py index 8a1e93d8f..d1cff0f6c 100644 --- a/llama_stack/apis/providers/providers.py +++ b/llama_stack/apis/providers/providers.py @@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.datatypes import HealthResponse from llama_stack.schema_utils import json_schema_type, webmethod @@ -45,7 +46,7 @@ class Providers(Protocol): Providers API for inspecting, listing, and modifying providers and their configurations. """ - @webmethod(route="/providers", method="GET") + @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1) async def list_providers(self) -> ListProvidersResponse: """List all available providers. @@ -53,7 +54,7 @@ class Providers(Protocol): """ ... - @webmethod(route="/providers/{provider_id}", method="GET") + @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1) async def inspect_provider(self, provider_id: str) -> ProviderInfo: """Get detailed information about a specific provider. diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py index 25ee03ec1..98367e9b0 100644 --- a/llama_stack/apis/safety/safety.py +++ b/llama_stack/apis/safety/safety.py @@ -11,6 +11,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import Message from llama_stack.apis.shields import Shield +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -97,7 +98,7 @@ class ShieldStore(Protocol): class Safety(Protocol): shield_store: ShieldStore - @webmethod(route="/safety/run-shield", method="POST") + @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1) async def run_shield( self, shield_id: str, @@ -113,7 +114,7 @@ class Safety(Protocol): """ ... - @webmethod(route="/openai/v1/moderations", method="POST") + @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1) async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: """Classifies if text and/or image inputs are potentially harmful. :param input: Input (or inputs) to classify. diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py index 8ca599b44..03d943e94 100644 --- a/llama_stack/apis/scoring/scoring.py +++ b/llama_stack/apis/scoring/scoring.py @@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import json_schema_type, webmethod # mapping of metric to value @@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol): class Scoring(Protocol): scoring_function_store: ScoringFunctionStore - @webmethod(route="/scoring/score-batch", method="POST") + @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1) async def score_batch( self, dataset_id: str, @@ -77,7 +78,7 @@ class Scoring(Protocol): """ ... - @webmethod(route="/scoring/score", method="POST") + @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1) async def score( self, input_rows: list[dict[str, Any]], diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py index 541067766..fe49723ab 100644 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/llama_stack/apis/scoring_functions/scoring_functions.py @@ -18,6 +18,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.common.type_system import ParamType from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel): @runtime_checkable class ScoringFunctions(Protocol): - @webmethod(route="/scoring-functions", method="GET") + @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1) async def list_scoring_functions(self) -> ListScoringFunctionsResponse: """List all scoring functions. @@ -168,7 +169,7 @@ class ScoringFunctions(Protocol): """ ... - @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET") + @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: """Get a scoring function by its ID. @@ -177,7 +178,7 @@ class ScoringFunctions(Protocol): """ ... - @webmethod(route="/scoring-functions", method="POST") + @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1) async def register_scoring_function( self, scoring_fn_id: str, @@ -198,7 +199,7 @@ class ScoringFunctions(Protocol): """ ... - @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE") + @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) async def unregister_scoring_function(self, scoring_fn_id: str) -> None: """Unregister a scoring function. diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index ec1b85349..5d967cf02 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel): @runtime_checkable @trace_protocol class Shields(Protocol): - @webmethod(route="/shields", method="GET") + @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1) async def list_shields(self) -> ListShieldsResponse: """List all shields. @@ -57,7 +58,7 @@ class Shields(Protocol): """ ... - @webmethod(route="/shields/{identifier:path}", method="GET") + @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_shield(self, identifier: str) -> Shield: """Get a shield by its identifier. @@ -66,7 +67,7 @@ class Shields(Protocol): """ ... - @webmethod(route="/shields", method="POST") + @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1) async def register_shield( self, shield_id: str, @@ -84,7 +85,7 @@ class Shields(Protocol): """ ... - @webmethod(route="/shields/{identifier:path}", method="DELETE") + @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1) async def unregister_shield(self, identifier: str) -> None: """Unregister a shield. diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py index a7af44b28..c13e2c17c 100644 --- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py @@ -10,6 +10,7 @@ from typing import Any, Protocol from pydantic import BaseModel from llama_stack.apis.inference import Message +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.schema_utils import json_schema_type, webmethod @@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel): class SyntheticDataGeneration(Protocol): - @webmethod(route="/synthetic-data-generation/generate") + @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1) def synthetic_data_generate( self, dialogs: list[Message], diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index 8d1b5d697..29dd23989 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -16,6 +16,7 @@ from typing import ( from pydantic import BaseModel, Field +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.models.llama.datatypes import Primitive from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel): @runtime_checkable class Telemetry(Protocol): - @webmethod(route="/telemetry/events", method="POST") + @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1) async def log_event( self, event: Event, @@ -425,7 +426,7 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE) + @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1) async def query_traces( self, attribute_filters: list[QueryCondition] | None = None, @@ -443,7 +444,9 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE) + @webmethod( + route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1 + ) async def get_trace(self, trace_id: str) -> Trace: """Get a trace by its ID. @@ -453,7 +456,10 @@ class Telemetry(Protocol): ... @webmethod( - route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE + route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", + method="GET", + required_scope=REQUIRED_SCOPE, + level=LLAMA_STACK_API_V1, ) async def get_span(self, trace_id: str, span_id: str) -> Span: """Get a span by its ID. @@ -464,7 +470,12 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE) + @webmethod( + route="/telemetry/spans/{span_id:path}/tree", + method="POST", + required_scope=REQUIRED_SCOPE, + level=LLAMA_STACK_API_V1, + ) async def get_span_tree( self, span_id: str, @@ -480,7 +491,7 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE) + @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1) async def query_spans( self, attribute_filters: list[QueryCondition], @@ -496,7 +507,7 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/spans/export", method="POST") + @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1) async def save_spans_to_dataset( self, attribute_filters: list[QueryCondition], @@ -513,7 +524,9 @@ class Telemetry(Protocol): """ ... - @webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE) + @webmethod( + route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1 + ) async def query_metrics( self, metric_name: str, diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index 651016bd1..ed7847e23 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator from typing_extensions import runtime_checkable from llama_stack.apis.common.content_types import URL, InterleavedContent +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel): @runtime_checkable @trace_protocol class RAGToolRuntime(Protocol): - @webmethod(route="/tool-runtime/rag-tool/insert", method="POST") + @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1) async def insert( self, documents: list[RAGDocument], @@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol): """ ... - @webmethod(route="/tool-runtime/rag-tool/query", method="POST") + @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1) async def query( self, content: InterleavedContent, diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py index 52b86375a..12bd9efa2 100644 --- a/llama_stack/apis/tools/tools.py +++ b/llama_stack/apis/tools/tools.py @@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -151,7 +152,7 @@ class ListToolDefsResponse(BaseModel): @runtime_checkable @trace_protocol class ToolGroups(Protocol): - @webmethod(route="/toolgroups", method="POST") + @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1) async def register_tool_group( self, toolgroup_id: str, @@ -168,7 +169,7 @@ class ToolGroups(Protocol): """ ... - @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET") + @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_tool_group( self, toolgroup_id: str, @@ -180,7 +181,7 @@ class ToolGroups(Protocol): """ ... - @webmethod(route="/toolgroups", method="GET") + @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1) async def list_tool_groups(self) -> ListToolGroupsResponse: """List tool groups with optional provider. @@ -188,7 +189,7 @@ class ToolGroups(Protocol): """ ... - @webmethod(route="/tools", method="GET") + @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1) async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse: """List tools with optional tool group. @@ -197,7 +198,7 @@ class ToolGroups(Protocol): """ ... - @webmethod(route="/tools/{tool_name:path}", method="GET") + @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_tool( self, tool_name: str, @@ -209,7 +210,7 @@ class ToolGroups(Protocol): """ ... - @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE") + @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) async def unregister_toolgroup( self, toolgroup_id: str, @@ -238,7 +239,7 @@ class ToolRuntime(Protocol): rag_tool: RAGToolRuntime | None = None # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. - @webmethod(route="/tool-runtime/list-tools", method="GET") + @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1) async def list_runtime_tools( self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None ) -> ListToolDefsResponse: @@ -250,7 +251,7 @@ class ToolRuntime(Protocol): """ ... - @webmethod(route="/tool-runtime/invoke", method="POST") + @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1) async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: """Run a tool with the given arguments. diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 47820fa0f..521d129c6 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable from pydantic import BaseModel from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel): @runtime_checkable @trace_protocol class VectorDBs(Protocol): - @webmethod(route="/vector-dbs", method="GET") + @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1) async def list_vector_dbs(self) -> ListVectorDBsResponse: """List all vector databases. @@ -73,7 +74,7 @@ class VectorDBs(Protocol): """ ... - @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET") + @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1) async def get_vector_db( self, vector_db_id: str, @@ -85,7 +86,7 @@ class VectorDBs(Protocol): """ ... - @webmethod(route="/vector-dbs", method="POST") + @webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1) async def register_vector_db( self, vector_db_id: str, @@ -107,7 +108,7 @@ class VectorDBs(Protocol): """ ... - @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE") + @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1) async def unregister_vector_db(self, vector_db_id: str) -> None: """Unregister a vector database. diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 3e8065cfb..2850863c4 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -15,6 +15,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack.schema_utils import json_schema_type, webmethod @@ -437,7 +438,7 @@ class VectorIO(Protocol): # this will just block now until chunks are inserted, but it should # probably return a Job instance which can be polled for completion - @webmethod(route="/vector-io/insert", method="POST") + @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) async def insert_chunks( self, vector_db_id: str, @@ -455,7 +456,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/vector-io/query", method="POST") + @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) async def query_chunks( self, vector_db_id: str, @@ -472,7 +473,7 @@ class VectorIO(Protocol): ... # OpenAI Vector Stores API endpoints - @webmethod(route="/openai/v1/vector_stores", method="POST") + @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1) async def openai_create_vector_store( self, name: str | None = None, @@ -498,7 +499,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores", method="GET") + @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1) async def openai_list_vector_stores( self, limit: int | None = 20, @@ -516,7 +517,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET") + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1) async def openai_retrieve_vector_store( self, vector_store_id: str, @@ -528,7 +529,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST") + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1) async def openai_update_vector_store( self, vector_store_id: str, @@ -546,7 +547,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE") + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1) async def openai_delete_vector_store( self, vector_store_id: str, @@ -558,7 +559,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST") + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1) async def openai_search_vector_store( self, vector_store_id: str, @@ -584,7 +585,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST") + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1) async def openai_attach_file_to_vector_store( self, vector_store_id: str, @@ -602,7 +603,7 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET") + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1) async def openai_list_files_in_vector_store( self, vector_store_id: str, @@ -624,7 +625,9 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET") + @webmethod( + route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1 + ) async def openai_retrieve_vector_store_file( self, vector_store_id: str, @@ -638,7 +641,11 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET") + @webmethod( + route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", + method="GET", + level=LLAMA_STACK_API_V1, + ) async def openai_retrieve_vector_store_file_contents( self, vector_store_id: str, @@ -652,7 +659,9 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST") + @webmethod( + route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1 + ) async def openai_update_vector_store_file( self, vector_store_id: str, @@ -668,7 +677,9 @@ class VectorIO(Protocol): """ ... - @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE") + @webmethod( + route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1 + ) async def openai_delete_vector_store_file( self, vector_store_id: str, diff --git a/llama_stack/apis/version.py b/llama_stack/apis/version.py index 53ad6a854..6af039b1f 100644 --- a/llama_stack/apis/version.py +++ b/llama_stack/apis/version.py @@ -4,4 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -LLAMA_STACK_API_VERSION = "v1" +LLAMA_STACK_API_V1 = "v1" +LLAMA_STACK_API_V1BETA = "v1beta" +LLAMA_STACK_API_V1ALPHA = "v1alpha" diff --git a/llama_stack/core/client.py b/llama_stack/core/client.py index 03e4fb051..49e01794e 100644 --- a/llama_stack/core/client.py +++ b/llama_stack/core/client.py @@ -15,7 +15,6 @@ import httpx from pydantic import BaseModel, parse_obj_as from termcolor import cprint -from llama_stack.apis.version import LLAMA_STACK_API_VERSION from llama_stack.providers.datatypes import RemoteProviderConfig _CLIENT_CLASSES = {} @@ -114,7 +113,24 @@ def create_api_client_class(protocol) -> type: break kwargs[param.name] = args[i] - url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}" + # Get all webmethods for this method (supports multiple decorators) + webmethods = getattr(method, "__webmethods__", []) + + if not webmethods: + raise RuntimeError(f"Method {method} has no webmethod decorators") + + # Choose the preferred webmethod (non-deprecated if available) + preferred_webmethod = None + for wm in webmethods: + if not getattr(wm, "deprecated", False): + preferred_webmethod = wm + break + + # If no non-deprecated found, use the first one + if preferred_webmethod is None: + preferred_webmethod = webmethods[0] + + url = f"{self.base_url}/{preferred_webmethod.level}/{preferred_webmethod.route.lstrip('/')}" def convert(value): if isinstance(value, list): diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py index 7baf20da5..4970d0bf8 100644 --- a/llama_stack/core/server/routes.py +++ b/llama_stack/core/server/routes.py @@ -14,7 +14,6 @@ from starlette.routing import Route from llama_stack.apis.datatypes import Api, ExternalApiSpec from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup -from llama_stack.apis.version import LLAMA_STACK_API_VERSION from llama_stack.core.resolver import api_protocol_map from llama_stack.schema_utils import WebMethod @@ -54,22 +53,23 @@ def get_all_api_routes( protocol_methods.append((f"{tool_group.value}.{name}", method)) for name, method in protocol_methods: - if not hasattr(method, "__webmethod__"): + # Get all webmethods for this method (supports multiple decorators) + webmethods = getattr(method, "__webmethods__", []) + if not webmethods: continue - # The __webmethod__ attribute is dynamically added by the @webmethod decorator - # mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error - webmethod = method.__webmethod__ # type: ignore[attr-defined] - path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}" - if webmethod.method == hdrs.METH_GET: - http_method = hdrs.METH_GET - elif webmethod.method == hdrs.METH_DELETE: - http_method = hdrs.METH_DELETE - else: - http_method = hdrs.METH_POST - routes.append( - (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) - ) # setting endpoint to None since don't use a Router object + # Create routes for each webmethod decorator + for webmethod in webmethods: + path = f"/{webmethod.level}/{webmethod.route.lstrip('/')}" + if webmethod.method == hdrs.METH_GET: + http_method = hdrs.METH_GET + elif webmethod.method == hdrs.METH_DELETE: + http_method = hdrs.METH_DELETE + else: + http_method = hdrs.METH_POST + routes.append( + (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) + ) # setting endpoint to None since don't use a Router object apis[api] = routes diff --git a/llama_stack/core/server/tracing.py b/llama_stack/core/server/tracing.py index c48fc4d33..4c6df5b42 100644 --- a/llama_stack/core/server/tracing.py +++ b/llama_stack/core/server/tracing.py @@ -45,6 +45,14 @@ class TracingMiddleware: logger.debug(f"No matching route found for path: {path}, falling back to FastAPI") return await self.app(scope, receive, send) + # Log deprecation warning if route is deprecated + if getattr(webmethod, "deprecated", False): + logger.warning( + f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - " + f"This route is deprecated and may be removed in a future version. " + f"Please check the docs for the supported version." + ) + trace_attributes = {"__location__": "server", "raw_path": path} # Extract W3C trace context headers and store as trace attributes diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py index 93382a881..4f8b4edff 100644 --- a/llama_stack/schema_utils.py +++ b/llama_stack/schema_utils.py @@ -13,6 +13,7 @@ from .strong_typing.schema import json_schema_type, register_schema # noqa: F40 @dataclass class WebMethod: + level: str | None = None route: str | None = None public: bool = False request_examples: list[Any] | None = None @@ -23,6 +24,7 @@ class WebMethod: descriptive_name: str | None = None experimental: bool | None = False required_scope: str | None = None + deprecated: bool | None = False T = TypeVar("T", bound=Callable[..., Any]) @@ -31,6 +33,7 @@ T = TypeVar("T", bound=Callable[..., Any]) def webmethod( route: str | None = None, method: str | None = None, + level: str | None = None, public: bool | None = False, request_examples: list[Any] | None = None, response_examples: list[Any] | None = None, @@ -38,6 +41,7 @@ def webmethod( descriptive_name: str | None = None, experimental: bool | None = False, required_scope: str | None = None, + deprecated: bool | None = False, ) -> Callable[[T], T]: """ Decorator that supplies additional metadata to an endpoint operation function. @@ -51,9 +55,10 @@ def webmethod( """ def wrap(func: T) -> T: - func.__webmethod__ = WebMethod( # type: ignore + webmethod_obj = WebMethod( route=route, method=method, + level=level, public=public or False, request_examples=request_examples, response_examples=response_examples, @@ -61,7 +66,16 @@ def webmethod( descriptive_name=descriptive_name, experimental=experimental, required_scope=required_scope, + deprecated=deprecated, ) + + # Store all webmethods in a list to support multiple decorators + if not hasattr(func, "__webmethods__"): + func.__webmethods__ = [] # type: ignore + func.__webmethods__.append(webmethod_obj) # type: ignore + + # Keep the last one as __webmethod__ for backwards compatibility + func.__webmethod__ = webmethod_obj # type: ignore return func return wrap diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py index de1427bfd..e97a9d8fb 100644 --- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py +++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py @@ -6,6 +6,7 @@ from typing import Protocol +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec from llama_stack.schema_utils import webmethod @@ -28,7 +29,7 @@ class WeatherProvider(Protocol): A protocol for the Weather API. """ - @webmethod(route="/weather/locations", method="GET") + @webmethod(route="/weather/locations", method="GET", level=LLAMA_STACK_API_V1) async def get_available_locations() -> dict[str, list[str]]: """ Get the available locations.