diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index c27bc6440..54031d839 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION  # noqa: E402
+from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
@@ -25,7 +25,7 @@ from .pyopenapi.utility import Specification, validate_api  # noqa: E402
 
 
 def str_presenter(dumper, data):
-    if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
+    if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith(
         "#/components/schemas/"
     ):
         style = None
@@ -58,7 +58,7 @@ def main(output_dir: str):
             server=Server(url="http://any-hosted-llama-stack.com"),
             info=Info(
                 title="Llama Stack Specification",
-                version=LLAMA_STACK_API_VERSION,
+                version=LLAMA_STACK_API_V1,
                 description="""This is the specification of the Llama Stack that provides
                 a set of endpoints and their corresponding interfaces that are tailored to
                 best leverage Llama Models.""",
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 5ac712f02..cdbf1c60c 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -829,7 +829,7 @@ class Generator:
             else:
                 raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
 
-            route = op.get_route()
+            route = op.get_route(op.webmethod)
             route = route.replace(":path", "")
             print(f"route: {route}")
             if route in paths:
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 045e33848..ce33d3bb9 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,7 +11,7 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 
 from termcolor import colored
 
@@ -113,11 +113,13 @@ class EndpointOperation:
     request_examples: Optional[List[Any]] = None
     response_examples: Optional[List[Any]] = None
 
-    def get_route(self) -> str:
-        if self.route is not None:
-            return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])
+    def get_route(self, webmethod) -> str:
+        api_level = webmethod.level
 
-        route_parts = ["", LLAMA_STACK_API_VERSION, self.name]
+        if self.route is not None:
+            return "/".join(["", api_level, self.route.lstrip("/")])
+
+        route_parts = ["", api_level, self.name]
         for param_name, _ in self.path_params:
             route_parts.append("{" + param_name + "}")
         return "/".join(route_parts)
@@ -152,33 +154,39 @@ def _get_endpoint_functions(
 
     functions = inspect.getmembers(endpoint, inspect.isfunction)
     for func_name, func_ref in functions:
-        webmethod = getattr(func_ref, "__webmethod__", None)
-        if not webmethod:
+        webmethods = []
+
+        # Check for multiple webmethods (stacked decorators)
+        if hasattr(func_ref, "__webmethods__"):
+            webmethods = func_ref.__webmethods__
+
+        if not webmethods:
             continue
 
-        print(f"Processing {colored(func_name, 'white')}...")
-        operation_name = func_name
-        
-        if webmethod.method == "GET":
-            prefix = "get"
-        elif webmethod.method == "DELETE":
-            prefix = "delete"
-        elif webmethod.method == "POST":
-            prefix = "post"
-        elif operation_name.startswith("get_") or operation_name.endswith("/get"):
-            prefix = "get"
-        elif (
-            operation_name.startswith("delete_")
-            or operation_name.startswith("remove_")
-            or operation_name.endswith("/delete")
-            or operation_name.endswith("/remove")
-        ):
-            prefix = "delete"
-        else:
-            # by default everything else is a POST
-            prefix = "post"
+        for webmethod in webmethods:
+            print(f"Processing {colored(func_name, 'white')}...")
+            operation_name = func_name
+            
+            if webmethod.method == "GET":
+                prefix = "get"
+            elif webmethod.method == "DELETE":
+                prefix = "delete"
+            elif webmethod.method == "POST":
+                prefix = "post"
+            elif operation_name.startswith("get_") or operation_name.endswith("/get"):
+                prefix = "get"
+            elif (
+                operation_name.startswith("delete_")
+                or operation_name.startswith("remove_")
+                or operation_name.endswith("/delete")
+                or operation_name.endswith("/remove")
+            ):
+                prefix = "delete"
+            else:
+                # by default everything else is a POST
+                prefix = "post"
 
-        yield prefix, operation_name, func_name, func_ref
+            yield prefix, operation_name, func_name, func_ref
 
 
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
@@ -239,105 +247,101 @@ def get_endpoint_operations(
             "update",
         ],
     ):
-        # extract routing information from function metadata
-        webmethod = getattr(func_ref, "__webmethod__", None)
-        if webmethod is not None:
+        # Get all webmethods for this function
+        webmethods = getattr(func_ref, "__webmethods__", [])
+
+        # Create one EndpointOperation for each webmethod
+        for webmethod in webmethods:
             route = webmethod.route
             route_params = _get_route_parameters(route) if route is not None else None
             public = webmethod.public
             request_examples = webmethod.request_examples
             response_examples = webmethod.response_examples
-        else:
-            route = None
-            route_params = None
-            public = False
-            request_examples = None
-            response_examples = None
 
-        # inspect function signature for path and query parameters, and request/response payload type
-        signature = get_signature(func_ref)
+            # inspect function signature for path and query parameters, and request/response payload type
+            signature = get_signature(func_ref)
 
-        path_params = []
-        query_params = []
-        request_params = []
-        multipart_params = []
+            path_params = []
+            query_params = []
+            request_params = []
+            multipart_params = []
 
-        for param_name, parameter in signature.parameters.items():
-            param_type = _get_annotation_type(parameter.annotation, func_ref)
+            for param_name, parameter in signature.parameters.items():
+                param_type = _get_annotation_type(parameter.annotation, func_ref)
 
-            # omit "self" for instance methods
-            if param_name == "self" and param_type is inspect.Parameter.empty:
-                continue
+                # omit "self" for instance methods
+                if param_name == "self" and param_type is inspect.Parameter.empty:
+                    continue
 
-            # check if all parameters have explicit type
-            if parameter.annotation is inspect.Parameter.empty:
+                # check if all parameters have explicit type
+                if parameter.annotation is inspect.Parameter.empty:
+                    raise ValidationError(
+                        f"parameter '{param_name}' in function '{func_name}' has no type annotation"
+                    )
+
+                is_multipart = _is_multipart_param(param_type)
+
+                if prefix in ["get", "delete"]:
+                    if route_params is not None and param_name in route_params:
+                        path_params.append((param_name, param_type))
+                    else:
+                        query_params.append((param_name, param_type))
+                else:
+                    if route_params is not None and param_name in route_params:
+                        path_params.append((param_name, param_type))
+                    elif is_multipart:
+                        multipart_params.append((param_name, param_type))
+                    else:
+                        request_params.append((param_name, param_type))
+
+            # check if function has explicit return type
+            if signature.return_annotation is inspect.Signature.empty:
                 raise ValidationError(
-                    f"parameter '{param_name}' in function '{func_name}' has no type annotation"
+                    f"function '{func_name}' has no return type annotation"
                 )
 
-            is_multipart = _is_multipart_param(param_type)
-            
-            if prefix in ["get", "delete"]:
-                if route_params is not None and param_name in route_params:
-                    path_params.append((param_name, param_type))
-                else:
-                    query_params.append((param_name, param_type))
+            return_type = _get_annotation_type(signature.return_annotation, func_ref)
+
+            # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
+            # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
+            if typing.get_origin(return_type) is collections.abc.Generator:
+                event_type, send_type, response_type = typing.get_args(return_type)
+                if send_type is not type(None):
+                    raise ValidationError(
+                        f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
+                    )
             else:
-                if route_params is not None and param_name in route_params:
-                    path_params.append((param_name, param_type))
-                elif is_multipart:
-                    multipart_params.append((param_name, param_type))
+                event_type = None
+
+                def process_type(t):
+                    if typing.get_origin(t) is collections.abc.AsyncIterator:
+                        # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
+                        # or the item type. I am choosing it to be the latter
+                        args = typing.get_args(t)
+                        return args[0]
+                    elif typing.get_origin(t) is typing.Union:
+                        types = [process_type(a) for a in typing.get_args(t)]
+                        return typing._UnionGenericAlias(typing.Union, tuple(types))
+                    else:
+                        return t
+
+                response_type = process_type(return_type)
+
+                if prefix in ["delete", "remove"]:
+                    http_method = HTTPMethod.DELETE
+                elif prefix == "post":
+                    http_method = HTTPMethod.POST
+                elif prefix == "get":
+                    http_method = HTTPMethod.GET
+                elif prefix == "set":
+                    http_method = HTTPMethod.PUT
+                elif prefix == "update":
+                    http_method = HTTPMethod.PATCH
                 else:
-                    request_params.append((param_name, param_type))
+                    raise ValidationError(f"unknown prefix {prefix}")
 
-        # check if function has explicit return type
-        if signature.return_annotation is inspect.Signature.empty:
-            raise ValidationError(
-                f"function '{func_name}' has no return type annotation"
-            )
-
-        return_type = _get_annotation_type(signature.return_annotation, func_ref)
-
-        # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
-        # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
-        if typing.get_origin(return_type) is collections.abc.Generator:
-            event_type, send_type, response_type = typing.get_args(return_type)
-            if send_type is not type(None):
-                raise ValidationError(
-                    f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
-                )
-        else:
-            event_type = None
-
-            def process_type(t):
-                if typing.get_origin(t) is collections.abc.AsyncIterator:
-                    # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
-                    # or the item type. I am choosing it to be the latter
-                    args = typing.get_args(t)
-                    return args[0]
-                elif typing.get_origin(t) is typing.Union:
-                    types = [process_type(a) for a in typing.get_args(t)]
-                    return typing._UnionGenericAlias(typing.Union, tuple(types))
-                else:
-                    return t
-
-            response_type = process_type(return_type)
-
-            if prefix in ["delete", "remove"]:
-                http_method = HTTPMethod.DELETE
-            elif prefix == "post":
-                http_method = HTTPMethod.POST
-            elif prefix == "get":
-                http_method = HTTPMethod.GET
-            elif prefix == "set":
-                http_method = HTTPMethod.PUT
-            elif prefix == "update":
-                http_method = HTTPMethod.PATCH
-            else:
-                raise ValidationError(f"unknown prefix {prefix}")
-
-        result.append(
-            EndpointOperation(
+            # Create an EndpointOperation for this specific webmethod
+            operation = EndpointOperation(
                 defining_class=_get_defining_class(func_name, endpoint),
                 name=operation_name,
                 func_name=func_name,
@@ -354,7 +358,10 @@ def get_endpoint_operations(
                 request_examples=request_examples if use_examples else None,
                 response_examples=response_examples if use_examples else None,
             )
-        )
+
+            # Store the specific webmethod with this operation
+            operation.webmethod = webmethod
+            result.append(operation)
 
     if not result:
         raise ValidationError(f"no eligible endpoint operations in type {endpoint}")
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 36cb025c4..5a61221c1 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -175,6 +175,43 @@
                 }
             }
         },
+        "/v1alpha/post-training/job/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Cancel a training job.",
+                "description": "Cancel a training job.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CancelTrainingJobRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/post-training/job/cancel": {
             "post": {
                 "responses": {
@@ -1179,6 +1216,60 @@
                 }
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "EvaluateResponse object containing generations and scores.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EvaluateResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Evaluate a list of rows on a benchmark.",
+                "description": "Evaluate a list of rows on a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/EvaluateRowsRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/evaluations": {
             "post": {
                 "responses": {
@@ -1366,6 +1457,85 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A Benchmark.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Benchmark"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "Get a benchmark by its ID.",
+                "description": "Get a benchmark by its ID.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to get.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "Unregister a benchmark.",
+                "description": "Unregister a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to unregister.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}": {
             "get": {
                 "responses": {
@@ -2079,6 +2249,50 @@
                 ]
             }
         },
+        "/v1alpha/post-training/job/artifacts": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJobArtifactsResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Get the artifacts of a training job.",
+                "description": "Get the artifacts of a training job.",
+                "parameters": [
+                    {
+                        "name": "job_uuid",
+                        "in": "query",
+                        "description": "The UUID of the job to get the artifacts of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/post-training/job/artifacts": {
             "get": {
                 "responses": {
@@ -2123,6 +2337,50 @@
                 ]
             }
         },
+        "/v1alpha/post-training/job/status": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJobStatusResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJobStatusResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Get the status of a training job.",
+                "description": "Get the status of a training job.",
+                "parameters": [
+                    {
+                        "name": "job_uuid",
+                        "in": "query",
+                        "description": "The UUID of the job to get the status of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/post-training/job/status": {
             "get": {
                 "responses": {
@@ -2167,6 +2425,40 @@
                 ]
             }
         },
+        "/v1alpha/post-training/jobs": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListPostTrainingJobsResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListPostTrainingJobsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Get all training jobs.",
+                "description": "Get all training jobs.",
+                "parameters": []
+            }
+        },
         "/v1/post-training/jobs": {
             "get": {
                 "responses": {
@@ -2538,6 +2830,103 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The status of the evaluation job.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Job"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Get the status of a job.",
+                "description": "Get the status of a job.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "description": "The ID of the job to get the status of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Cancel a job.",
+                "description": "Cancel a job.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "description": "The ID of the job to cancel.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
             "get": {
                 "responses": {
@@ -2635,6 +3024,59 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The result of the job.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EvaluateResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Get the result of a job.",
+                "description": "Get the result of a job.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "description": "The ID of the job to get the result of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
             "get": {
                 "responses": {
@@ -2750,6 +3192,75 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListBenchmarksResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListBenchmarksResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "List all benchmarks.",
+                "description": "List all benchmarks.",
+                "parameters": []
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "Register a benchmark.",
+                "description": "Register a benchmark.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RegisterBenchmarkRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/eval/benchmarks": {
             "get": {
                 "responses": {
@@ -4783,6 +5294,50 @@
                 }
             }
         },
+        "/v1alpha/post-training/preference-optimize": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJob.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJob"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Run preference optimization of a model.",
+                "description": "Run preference optimization of a model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/PreferenceOptimizeRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/post-training/preference-optimize": {
             "post": {
                 "responses": {
@@ -5178,6 +5733,60 @@
                 }
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/jobs": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The job that was created to run the evaluation.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Job"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Run an evaluation on a benchmark.",
+                "description": "Run an evaluation on a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RunEvalRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/jobs": {
             "post": {
                 "responses": {
@@ -5499,6 +6108,50 @@
                 }
             }
         },
+        "/v1alpha/post-training/supervised-fine-tune": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJob.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJob"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Run supervised fine-tuning of a model.",
+                "description": "Run supervised fine-tuning of a model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/SupervisedFineTuneRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/post-training/supervised-fine-tune": {
             "post": {
                 "responses": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index c4b82b630..9c0558658 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -109,6 +109,32 @@ paths:
             schema:
               $ref: '#/components/schemas/BatchCompletionRequest'
         required: true
+  /v1alpha/post-training/job/cancel:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Cancel a training job.
+      description: Cancel a training job.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CancelTrainingJobRequest'
+        required: true
   /v1/post-training/job/cancel:
     post:
       responses:
@@ -832,6 +858,44 @@ paths:
             schema:
               $ref: '#/components/schemas/EmbeddingsRequest'
         required: true
+  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
+    post:
+      responses:
+        '200':
+          description: >-
+            EvaluateResponse object containing generations and scores.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Evaluate a list of rows on a benchmark.
+      description: Evaluate a list of rows on a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/EvaluateRowsRequest'
+        required: true
   /v1/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -962,6 +1026,61 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/eval/benchmarks/{benchmark_id}:
+    get:
+      responses:
+        '200':
+          description: A Benchmark.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Benchmark'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Get a benchmark by its ID.
+      description: Get a benchmark by its ID.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: The ID of the benchmark to get.
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Unregister a benchmark.
+      description: Unregister a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: The ID of the benchmark to unregister.
+          required: true
+          schema:
+            type: string
   /v1/eval/benchmarks/{benchmark_id}:
     get:
       responses:
@@ -1458,6 +1577,37 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/post-training/job/artifacts:
+    get:
+      responses:
+        '200':
+          description: A PostTrainingJobArtifactsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Get the artifacts of a training job.
+      description: Get the artifacts of a training job.
+      parameters:
+        - name: job_uuid
+          in: query
+          description: >-
+            The UUID of the job to get the artifacts of.
+          required: true
+          schema:
+            type: string
   /v1/post-training/job/artifacts:
     get:
       responses:
@@ -1489,6 +1639,37 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/post-training/job/status:
+    get:
+      responses:
+        '200':
+          description: A PostTrainingJobStatusResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobStatusResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Get the status of a training job.
+      description: Get the status of a training job.
+      parameters:
+        - name: job_uuid
+          in: query
+          description: >-
+            The UUID of the job to get the status of.
+          required: true
+          schema:
+            type: string
   /v1/post-training/job/status:
     get:
       responses:
@@ -1520,6 +1701,30 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/post-training/jobs:
+    get:
+      responses:
+        '200':
+          description: A ListPostTrainingJobsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListPostTrainingJobsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Get all training jobs.
+      description: Get all training jobs.
+      parameters: []
   /v1/post-training/jobs:
     get:
       responses:
@@ -1804,6 +2009,75 @@ paths:
           required: false
           schema:
             type: integer
+  /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
+    get:
+      responses:
+        '200':
+          description: The status of the evaluation job.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Job'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Get the status of a job.
+      description: Get the status of a job.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+        - name: job_id
+          in: path
+          description: The ID of the job to get the status of.
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Cancel a job.
+      description: Cancel a job.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+        - name: job_id
+          in: path
+          description: The ID of the job to cancel.
+          required: true
+          schema:
+            type: string
   /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
       responses:
@@ -1873,6 +2147,43 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
+    get:
+      responses:
+        '200':
+          description: The result of the job.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Get the result of a job.
+      description: Get the result of a job.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+        - name: job_id
+          in: path
+          description: The ID of the job to get the result of.
+          required: true
+          schema:
+            type: string
   /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
     get:
       responses:
@@ -1953,6 +2264,55 @@ paths:
           required: false
           schema:
             type: integer
+  /v1alpha/eval/benchmarks:
+    get:
+      responses:
+        '200':
+          description: A ListBenchmarksResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBenchmarksResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: List all benchmarks.
+      description: List all benchmarks.
+      parameters: []
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Register a benchmark.
+      description: Register a benchmark.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterBenchmarkRequest'
+        required: true
   /v1/eval/benchmarks:
     get:
       responses:
@@ -3454,6 +3814,36 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
+  /v1alpha/post-training/preference-optimize:
+    post:
+      responses:
+        '200':
+          description: A PostTrainingJob.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Run preference optimization of a model.
+      description: Run preference optimization of a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/PreferenceOptimizeRequest'
+        required: true
   /v1/post-training/preference-optimize:
     post:
       responses:
@@ -3735,6 +4125,44 @@ paths:
             schema:
               $ref: '#/components/schemas/ResumeAgentTurnRequest'
         required: true
+  /v1alpha/eval/benchmarks/{benchmark_id}/jobs:
+    post:
+      responses:
+        '200':
+          description: >-
+            The job that was created to run the evaluation.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Job'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Run an evaluation on a benchmark.
+      description: Run an evaluation on a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RunEvalRequest'
+        required: true
   /v1/eval/benchmarks/{benchmark_id}/jobs:
     post:
       responses:
@@ -3961,6 +4389,36 @@ paths:
             schema:
               $ref: '#/components/schemas/SetDefaultVersionRequest'
         required: true
+  /v1alpha/post-training/supervised-fine-tune:
+    post:
+      responses:
+        '200':
+          description: A PostTrainingJob.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Run supervised fine-tuning of a model.
+      description: Run supervised fine-tuning of a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/SupervisedFineTuneRequest'
+        required: true
   /v1/post-training/supervised-fine-tune:
     post:
       responses:
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 7dd3e9289..e53ca82e2 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 from .openai_responses import (
@@ -481,7 +482,7 @@ class Agents(Protocol):
     - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
     """
 
-    @webmethod(route="/agents", method="POST", descriptive_name="create_agent")
+    @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
     async def create_agent(
         self,
         agent_config: AgentConfig,
@@ -494,7 +495,10 @@ class Agents(Protocol):
         ...
 
     @webmethod(
-        route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
+        route="/agents/{agent_id}/session/{session_id}/turn",
+        method="POST",
+        descriptive_name="create_agent_turn",
+        level=LLAMA_STACK_API_V1,
     )
     async def create_agent_turn(
         self,
@@ -524,6 +528,7 @@ class Agents(Protocol):
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
         method="POST",
         descriptive_name="resume_agent_turn",
+        level=LLAMA_STACK_API_V1,
     )
     async def resume_agent_turn(
         self,
@@ -549,6 +554,7 @@ class Agents(Protocol):
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
         method="GET",
+        level=LLAMA_STACK_API_V1,
     )
     async def get_agents_turn(
         self,
@@ -568,6 +574,7 @@ class Agents(Protocol):
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
         method="GET",
+        level=LLAMA_STACK_API_V1,
     )
     async def get_agents_step(
         self,
@@ -586,7 +593,12 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
+    @webmethod(
+        route="/agents/{agent_id}/session",
+        method="POST",
+        descriptive_name="create_agent_session",
+        level=LLAMA_STACK_API_V1,
+    )
     async def create_agent_session(
         self,
         agent_id: str,
@@ -600,7 +612,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_agents_session(
         self,
         session_id: str,
@@ -616,7 +628,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_agents_session(
         self,
         session_id: str,
@@ -629,7 +641,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_agent(
         self,
         agent_id: str,
@@ -640,7 +652,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents", method="GET")
+    @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
     async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
         """List all agents.
 
@@ -650,7 +662,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_agent(self, agent_id: str) -> Agent:
         """Describe an agent by its ID.
 
@@ -659,7 +671,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/sessions", method="GET")
+    @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_agent_sessions(
         self,
         agent_id: str,
@@ -682,7 +694,7 @@ class Agents(Protocol):
     #
     # Both of these APIs are inherently stateful.
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_openai_response(
         self,
         response_id: str,
@@ -694,7 +706,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="POST")
+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
     async def create_openai_response(
         self,
         input: str | list[OpenAIResponseInput],
@@ -719,7 +731,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="GET")
+    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_responses(
         self,
         after: str | None = None,
@@ -737,7 +749,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_response_input_items(
         self,
         response_id: str,
@@ -759,7 +771,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
         """Delete an OpenAI response by its ID.
 
diff --git a/llama_stack/apis/batch_inference/batch_inference.py b/llama_stack/apis/batch_inference/batch_inference.py
index b2aa637e2..43ade0221 100644
--- a/llama_stack/apis/batch_inference/batch_inference.py
+++ b/llama_stack/apis/batch_inference/batch_inference.py
@@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod
 
 
@@ -30,7 +31,7 @@ class BatchInference(Protocol):
     including (post-training, evals, etc).
     """
 
-    @webmethod(route="/batch-inference/completion", method="POST")
+    @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
     async def completion(
         self,
         model: str,
@@ -50,7 +51,7 @@ class BatchInference(Protocol):
         """
         ...
 
-    @webmethod(route="/batch-inference/chat-completion", method="POST")
+    @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
     async def chat_completion(
         self,
         model: str,
diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py
index c6bbd92eb..5890cbe04 100644
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 try:
@@ -42,7 +43,7 @@ class Batches(Protocol):
     Note: This API is currently under active development and may undergo changes.
     """
 
-    @webmethod(route="/openai/v1/batches", method="POST")
+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
     async def create_batch(
         self,
         input_file_id: str,
@@ -62,7 +63,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def retrieve_batch(self, batch_id: str) -> BatchObject:
         """Retrieve information about a specific batch.
 
@@ -71,7 +72,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     async def cancel_batch(self, batch_id: str) -> BatchObject:
         """Cancel a batch that is in progress.
 
@@ -80,7 +81,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches", method="GET")
+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
     async def list_batches(
         self,
         after: str | None = None,
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py
index 8d0a25e7b..d87d45a60 100644
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 
@@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
 
 @runtime_checkable
 class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET")
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def list_benchmarks(self) -> ListBenchmarksResponse:
         """List all benchmarks.
 
@@ -61,7 +63,8 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_benchmark(
         self,
         benchmark_id: str,
@@ -73,7 +76,8 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks", method="POST")
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def register_benchmark(
         self,
         benchmark_id: str,
@@ -94,7 +98,8 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
     async def unregister_benchmark(self, benchmark_id: str) -> None:
         """Unregister a benchmark.
 
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index 1183983cc..27e5336bc 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
 
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasets import Dataset
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod
 
 
@@ -20,7 +21,7 @@ class DatasetIO(Protocol):
     # keeping for aligning with inference/safety, but this is not used
     dataset_store: DatasetStore
 
-    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def iterrows(
         self,
         dataset_id: str,
@@ -44,7 +45,7 @@ class DatasetIO(Protocol):
         """
         ...
 
-    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
+    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
     async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
         """Append rows to a dataset.
 
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index f347e0e29..be0cbf09a 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
 
 
 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST")
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
     async def register_dataset(
         self,
         purpose: DatasetPurpose,
@@ -214,7 +215,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_dataset(
         self,
         dataset_id: str,
@@ -226,7 +227,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets", method="GET")
+    @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
     async def list_datasets(self) -> ListDatasetsResponse:
         """List all datasets.
 
@@ -234,7 +235,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_dataset(
         self,
         dataset_id: str,
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 83a0a8e56..bb81778f1 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
 class Eval(Protocol):
     """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def run_eval(
         self,
         benchmark_id: str,
@@ -97,7 +99,10 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def evaluate_rows(
         self,
         benchmark_id: str,
@@ -115,7 +120,10 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def job_status(self, benchmark_id: str, job_id: str) -> Job:
         """Get the status of a job.
 
@@ -125,7 +133,13 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
     async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
         """Cancel a job.
 
@@ -134,7 +148,15 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
+    )
     async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
         """Get the result of a job.
 
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 124e4bc8e..7e45b55ee 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.common.responses import Order
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
 @trace_protocol
 class Files(Protocol):
     # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST")
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_upload_file(
         self,
         file: Annotated[UploadFile, File()],
@@ -127,7 +128,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files", method="GET")
+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files(
         self,
         after: str | None = None,
@@ -146,7 +147,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file(
         self,
         file_id: str,
@@ -159,7 +160,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_file(
         self,
         file_id: str,
@@ -172,7 +173,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file_content(
         self,
         file_id: str,
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index bd4737ca7..4f5332b5f 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
@@ -1026,7 +1027,7 @@ class InferenceProvider(Protocol):
 
     model_store: ModelStore | None = None
 
-    @webmethod(route="/inference/completion", method="POST")
+    @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
     async def completion(
         self,
         model_id: str,
@@ -1049,7 +1050,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/batch-completion", method="POST", experimental=True)
+    @webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def batch_completion(
         self,
         model_id: str,
@@ -1070,7 +1071,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Batch completion is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/inference/chat-completion", method="POST")
+    @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
     async def chat_completion(
         self,
         model_id: str,
@@ -1110,7 +1111,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
+    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def batch_chat_completion(
         self,
         model_id: str,
@@ -1135,7 +1136,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Batch chat completion is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/inference/embeddings", method="POST")
+    @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def embeddings(
         self,
         model_id: str,
@@ -1155,7 +1156,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/rerank", method="POST", experimental=True)
+    @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def rerank(
         self,
         model: str,
@@ -1174,7 +1175,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Reranking is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/openai/v1/completions", method="POST")
+    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
         # Standard OpenAI completion parameters
@@ -1225,7 +1226,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/chat/completions", method="POST")
+    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
         model: str,
@@ -1281,7 +1282,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/embeddings", method="POST")
+    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
         model: str,
@@ -1310,7 +1311,7 @@ class Inference(InferenceProvider):
     - Embedding models: these models generate embeddings to be used for semantic search.
     """
 
-    @webmethod(route="/openai/v1/chat/completions", method="GET")
+    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
         after: str | None = None,
@@ -1328,7 +1329,7 @@ class Inference(InferenceProvider):
         """
         raise NotImplementedError("List chat completions is not implemented")
 
-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
+    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Describe a chat completion by its ID.
 
diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py
index 91d9c3da7..e859dbe59 100644
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
 
 @runtime_checkable
 class Inspect(Protocol):
-    @webmethod(route="/inspect/routes", method="GET")
+    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
     async def list_routes(self) -> ListRoutesResponse:
         """List all available API routes with their methods and implementing providers.
 
@@ -65,7 +66,7 @@ class Inspect(Protocol):
         """
         ...
 
-    @webmethod(route="/health", method="GET")
+    @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
     async def health(self) -> HealthInfo:
         """Get the current health status of the service.
 
@@ -73,7 +74,7 @@ class Inspect(Protocol):
         """
         ...
 
-    @webmethod(route="/version", method="GET")
+    @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
     async def version(self) -> VersionInfo:
         """Get the version of the service.
 
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 1af6fc9df..a4f6a888b 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Models(Protocol):
-    @webmethod(route="/models", method="GET")
+    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
     async def list_models(self) -> ListModelsResponse:
         """List all models.
 
@@ -110,7 +111,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/models", method="GET")
+    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_models(self) -> OpenAIListModelsResponse:
         """List models using the OpenAI API.
 
@@ -118,7 +119,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models/{model_id:path}", method="GET")
+    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_model(
         self,
         model_id: str,
@@ -130,7 +131,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models", method="POST")
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
     async def register_model(
         self,
         model_id: str,
@@ -150,7 +151,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models/{model_id:path}", method="DELETE")
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_model(
         self,
         model_id: str,
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index c16221289..30a51f765 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.common.job_types import JobStatus
 from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel):
 
 
 class PostTraining(Protocol):
-    @webmethod(route="/post-training/supervised-fine-tune", method="POST")
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def supervised_fine_tune(
         self,
         job_uuid: str,
@@ -310,7 +312,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/preference-optimize", method="POST")
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def preference_optimize(
         self,
         job_uuid: str,
@@ -332,7 +335,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/jobs", method="GET")
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
         """Get all training jobs.
 
@@ -340,7 +344,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/status", method="GET")
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
         """Get the status of a training job.
 
@@ -349,7 +354,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/cancel", method="POST")
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def cancel_training_job(self, job_uuid: str) -> None:
         """Cancel a training job.
 
@@ -357,7 +363,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/artifacts", method="GET")
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
         """Get the artifacts of a training job.
 
diff --git a/llama_stack/apis/prompts/prompts.py b/llama_stack/apis/prompts/prompts.py
index e6a376c3f..c56185e25 100644
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -95,7 +96,7 @@ class ListPromptsResponse(BaseModel):
 class Prompts(Protocol):
     """Protocol for prompt management operations."""
 
-    @webmethod(route="/prompts", method="GET")
+    @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
     async def list_prompts(self) -> ListPromptsResponse:
         """List all prompts.
 
@@ -103,7 +104,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}/versions", method="GET")
+    @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_prompt_versions(
         self,
         prompt_id: str,
@@ -115,7 +116,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}", method="GET")
+    @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_prompt(
         self,
         prompt_id: str,
@@ -129,7 +130,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts", method="POST")
+    @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
     async def create_prompt(
         self,
         prompt: str,
@@ -143,7 +144,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}", method="PUT")
+    @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
     async def update_prompt(
         self,
         prompt_id: str,
@@ -163,7 +164,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}", method="DELETE")
+    @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_prompt(
         self,
         prompt_id: str,
@@ -174,7 +175,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
+    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
     async def set_default_version(
         self,
         prompt_id: str,
diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py
index 8a1e93d8f..d1cff0f6c 100644
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthResponse
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -45,7 +46,7 @@ class Providers(Protocol):
     Providers API for inspecting, listing, and modifying providers and their configurations.
     """
 
-    @webmethod(route="/providers", method="GET")
+    @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
     async def list_providers(self) -> ListProvidersResponse:
         """List all available providers.
 
@@ -53,7 +54,7 @@ class Providers(Protocol):
         """
         ...
 
-    @webmethod(route="/providers/{provider_id}", method="GET")
+    @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def inspect_provider(self, provider_id: str) -> ProviderInfo:
         """Get detailed information about a specific provider.
 
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index 25ee03ec1..98367e9b0 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -11,6 +11,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.shields import Shield
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -97,7 +98,7 @@ class ShieldStore(Protocol):
 class Safety(Protocol):
     shield_store: ShieldStore
 
-    @webmethod(route="/safety/run-shield", method="POST")
+    @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
     async def run_shield(
         self,
         shield_id: str,
@@ -113,7 +114,7 @@ class Safety(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/moderations", method="POST")
+    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
     async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
         """Classifies if text and/or image inputs are potentially harmful.
         :param input: Input (or inputs) to classify.
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 8ca599b44..03d943e94 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
 
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 # mapping of metric to value
@@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol):
 class Scoring(Protocol):
     scoring_function_store: ScoringFunctionStore
 
-    @webmethod(route="/scoring/score-batch", method="POST")
+    @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
     async def score_batch(
         self,
         dataset_id: str,
@@ -77,7 +78,7 @@ class Scoring(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring/score", method="POST")
+    @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
     async def score(
         self,
         input_rows: list[dict[str, Any]],
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 541067766..fe49723ab 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -18,6 +18,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.apis.common.type_system import ParamType
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel):
 
 @runtime_checkable
 class ScoringFunctions(Protocol):
-    @webmethod(route="/scoring-functions", method="GET")
+    @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
         """List all scoring functions.
 
@@ -168,7 +169,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
         """Get a scoring function by its ID.
 
@@ -177,7 +178,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions", method="POST")
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
     async def register_scoring_function(
         self,
         scoring_fn_id: str,
@@ -198,7 +199,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
         """Unregister a scoring function.
 
diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py
index ec1b85349..5d967cf02 100644
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Shields(Protocol):
-    @webmethod(route="/shields", method="GET")
+    @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
     async def list_shields(self) -> ListShieldsResponse:
         """List all shields.
 
@@ -57,7 +58,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields/{identifier:path}", method="GET")
+    @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_shield(self, identifier: str) -> Shield:
         """Get a shield by its identifier.
 
@@ -66,7 +67,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields", method="POST")
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
     async def register_shield(
         self,
         shield_id: str,
@@ -84,7 +85,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields/{identifier:path}", method="DELETE")
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_shield(self, identifier: str) -> None:
         """Unregister a shield.
 
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
index a7af44b28..c13e2c17c 100644
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@@ -10,6 +10,7 @@ from typing import Any, Protocol
 from pydantic import BaseModel
 
 from llama_stack.apis.inference import Message
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 
@@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel):
 
 
 class SyntheticDataGeneration(Protocol):
-    @webmethod(route="/synthetic-data-generation/generate")
+    @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
     def synthetic_data_generate(
         self,
         dialogs: list[Message],
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 8d1b5d697..29dd23989 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -16,6 +16,7 @@ from typing import (
 
 from pydantic import BaseModel, Field
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import Primitive
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
@@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel):
 
 @runtime_checkable
 class Telemetry(Protocol):
-    @webmethod(route="/telemetry/events", method="POST")
+    @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
     async def log_event(
         self,
         event: Event,
@@ -425,7 +426,7 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
     async def query_traces(
         self,
         attribute_filters: list[QueryCondition] | None = None,
@@ -443,7 +444,9 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
+    )
     async def get_trace(self, trace_id: str) -> Trace:
         """Get a trace by its ID.
 
@@ -453,7 +456,10 @@ class Telemetry(Protocol):
         ...
 
     @webmethod(
-        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE
+        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
+        method="GET",
+        required_scope=REQUIRED_SCOPE,
+        level=LLAMA_STACK_API_V1,
     )
     async def get_span(self, trace_id: str, span_id: str) -> Span:
         """Get a span by its ID.
@@ -464,7 +470,12 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/spans/{span_id:path}/tree",
+        method="POST",
+        required_scope=REQUIRED_SCOPE,
+        level=LLAMA_STACK_API_V1,
+    )
     async def get_span_tree(
         self,
         span_id: str,
@@ -480,7 +491,7 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
     async def query_spans(
         self,
         attribute_filters: list[QueryCondition],
@@ -496,7 +507,7 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans/export", method="POST")
+    @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
     async def save_spans_to_dataset(
         self,
         attribute_filters: list[QueryCondition],
@@ -513,7 +524,9 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
+    )
     async def query_metrics(
         self,
         metric_name: str,
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 651016bd1..ed7847e23 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator
 from typing_extensions import runtime_checkable
 
 from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
@@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel):
 @runtime_checkable
 @trace_protocol
 class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
+    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
     async def insert(
         self,
         documents: list[RAGDocument],
@@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol):
         """
         ...
 
-    @webmethod(route="/tool-runtime/rag-tool/query", method="POST")
+    @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
     async def query(
         self,
         content: InterleavedContent,
diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py
index 52b86375a..12bd9efa2 100644
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable
 
 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -151,7 +152,7 @@ class ListToolDefsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class ToolGroups(Protocol):
-    @webmethod(route="/toolgroups", method="POST")
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
     async def register_tool_group(
         self,
         toolgroup_id: str,
@@ -168,7 +169,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_tool_group(
         self,
         toolgroup_id: str,
@@ -180,7 +181,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups", method="GET")
+    @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
     async def list_tool_groups(self) -> ListToolGroupsResponse:
         """List tool groups with optional provider.
 
@@ -188,7 +189,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/tools", method="GET")
+    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
         """List tools with optional tool group.
 
@@ -197,7 +198,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/tools/{tool_name:path}", method="GET")
+    @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_tool(
         self,
         tool_name: str,
@@ -209,7 +210,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_toolgroup(
         self,
         toolgroup_id: str,
@@ -238,7 +239,7 @@ class ToolRuntime(Protocol):
     rag_tool: RAGToolRuntime | None = None
 
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
-    @webmethod(route="/tool-runtime/list-tools", method="GET")
+    @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_runtime_tools(
         self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
@@ -250,7 +251,7 @@ class ToolRuntime(Protocol):
         """
         ...
 
-    @webmethod(route="/tool-runtime/invoke", method="POST")
+    @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
     async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         """Run a tool with the given arguments.
 
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
index 47820fa0f..521d129c6 100644
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class VectorDBs(Protocol):
-    @webmethod(route="/vector-dbs", method="GET")
+    @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
     async def list_vector_dbs(self) -> ListVectorDBsResponse:
         """List all vector databases.
 
@@ -73,7 +74,7 @@ class VectorDBs(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
+    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_vector_db(
         self,
         vector_db_id: str,
@@ -85,7 +86,7 @@ class VectorDBs(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-dbs", method="POST")
+    @webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1)
     async def register_vector_db(
         self,
         vector_db_id: str,
@@ -107,7 +108,7 @@ class VectorDBs(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
+    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_vector_db(self, vector_db_id: str) -> None:
         """Unregister a vector database.
 
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 3e8065cfb..2850863c4 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
@@ -437,7 +438,7 @@ class VectorIO(Protocol):
 
     # this will just block now until chunks are inserted, but it should
     # probably return a Job instance which can be polled for completion
-    @webmethod(route="/vector-io/insert", method="POST")
+    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
     async def insert_chunks(
         self,
         vector_db_id: str,
@@ -455,7 +456,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-io/query", method="POST")
+    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
     async def query_chunks(
         self,
         vector_db_id: str,
@@ -472,7 +473,7 @@ class VectorIO(Protocol):
         ...
 
     # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST")
+    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_create_vector_store(
         self,
         name: str | None = None,
@@ -498,7 +499,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores", method="GET")
+    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_vector_stores(
         self,
         limit: int | None = 20,
@@ -516,7 +517,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store(
         self,
         vector_store_id: str,
@@ -528,7 +529,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store(
         self,
         vector_store_id: str,
@@ -546,7 +547,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store(
         self,
         vector_store_id: str,
@@ -558,7 +559,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_search_vector_store(
         self,
         vector_store_id: str,
@@ -584,7 +585,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_attach_file_to_vector_store(
         self,
         vector_store_id: str,
@@ -602,7 +603,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files_in_vector_store(
         self,
         vector_store_id: str,
@@ -624,7 +625,9 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
+    )
     async def openai_retrieve_vector_store_file(
         self,
         vector_store_id: str,
@@ -638,7 +641,11 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
     async def openai_retrieve_vector_store_file_contents(
         self,
         vector_store_id: str,
@@ -652,7 +659,9 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
+    )
     async def openai_update_vector_store_file(
         self,
         vector_store_id: str,
@@ -668,7 +677,9 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
+    )
     async def openai_delete_vector_store_file(
         self,
         vector_store_id: str,
diff --git a/llama_stack/apis/version.py b/llama_stack/apis/version.py
index 53ad6a854..6af039b1f 100644
--- a/llama_stack/apis/version.py
+++ b/llama_stack/apis/version.py
@@ -4,4 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-LLAMA_STACK_API_VERSION = "v1"
+LLAMA_STACK_API_V1 = "v1"
+LLAMA_STACK_API_V1BETA = "v1beta"
+LLAMA_STACK_API_V1ALPHA = "v1alpha"
diff --git a/llama_stack/core/client.py b/llama_stack/core/client.py
index 03e4fb051..49e01794e 100644
--- a/llama_stack/core/client.py
+++ b/llama_stack/core/client.py
@@ -15,7 +15,6 @@ import httpx
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.providers.datatypes import RemoteProviderConfig
 
 _CLIENT_CLASSES = {}
@@ -114,7 +113,24 @@ def create_api_client_class(protocol) -> type:
                     break
                 kwargs[param.name] = args[i]
 
-            url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
+            # Get all webmethods for this method (supports multiple decorators)
+            webmethods = getattr(method, "__webmethods__", [])
+
+            if not webmethods:
+                raise RuntimeError(f"Method {method} has no webmethod decorators")
+
+            # Choose the preferred webmethod (non-deprecated if available)
+            preferred_webmethod = None
+            for wm in webmethods:
+                if not getattr(wm, "deprecated", False):
+                    preferred_webmethod = wm
+                    break
+
+            # If no non-deprecated found, use the first one
+            if preferred_webmethod is None:
+                preferred_webmethod = webmethods[0]
+
+            url = f"{self.base_url}/{preferred_webmethod.level}/{preferred_webmethod.route.lstrip('/')}"
 
             def convert(value):
                 if isinstance(value, list):
diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py
index 7baf20da5..4970d0bf8 100644
--- a/llama_stack/core/server/routes.py
+++ b/llama_stack/core/server/routes.py
@@ -14,7 +14,6 @@ from starlette.routing import Route
 
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod
 
@@ -54,22 +53,23 @@ def get_all_api_routes(
                     protocol_methods.append((f"{tool_group.value}.{name}", method))
 
         for name, method in protocol_methods:
-            if not hasattr(method, "__webmethod__"):
+            # Get all webmethods for this method (supports multiple decorators)
+            webmethods = getattr(method, "__webmethods__", [])
+            if not webmethods:
                 continue
 
-            # The __webmethod__ attribute is dynamically added by the @webmethod decorator
-            # mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error
-            webmethod = method.__webmethod__  # type: ignore[attr-defined]
-            path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
-            if webmethod.method == hdrs.METH_GET:
-                http_method = hdrs.METH_GET
-            elif webmethod.method == hdrs.METH_DELETE:
-                http_method = hdrs.METH_DELETE
-            else:
-                http_method = hdrs.METH_POST
-            routes.append(
-                (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
-            )  # setting endpoint to None since don't use a Router object
+            # Create routes for each webmethod decorator
+            for webmethod in webmethods:
+                path = f"/{webmethod.level}/{webmethod.route.lstrip('/')}"
+                if webmethod.method == hdrs.METH_GET:
+                    http_method = hdrs.METH_GET
+                elif webmethod.method == hdrs.METH_DELETE:
+                    http_method = hdrs.METH_DELETE
+                else:
+                    http_method = hdrs.METH_POST
+                routes.append(
+                    (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
+                )  # setting endpoint to None since don't use a Router object
 
         apis[api] = routes
 
diff --git a/llama_stack/core/server/tracing.py b/llama_stack/core/server/tracing.py
index c48fc4d33..4c6df5b42 100644
--- a/llama_stack/core/server/tracing.py
+++ b/llama_stack/core/server/tracing.py
@@ -45,6 +45,14 @@ class TracingMiddleware:
             logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
             return await self.app(scope, receive, send)
 
+        # Log deprecation warning if route is deprecated
+        if getattr(webmethod, "deprecated", False):
+            logger.warning(
+                f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
+                f"This route is deprecated and may be removed in a future version. "
+                f"Please check the docs for the supported version."
+            )
+
         trace_attributes = {"__location__": "server", "raw_path": path}
 
         # Extract W3C trace context headers and store as trace attributes
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 93382a881..4f8b4edff 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -13,6 +13,7 @@ from .strong_typing.schema import json_schema_type, register_schema  # noqa: F40
 
 @dataclass
 class WebMethod:
+    level: str | None = None
     route: str | None = None
     public: bool = False
     request_examples: list[Any] | None = None
@@ -23,6 +24,7 @@ class WebMethod:
     descriptive_name: str | None = None
     experimental: bool | None = False
     required_scope: str | None = None
+    deprecated: bool | None = False
 
 
 T = TypeVar("T", bound=Callable[..., Any])
@@ -31,6 +33,7 @@ T = TypeVar("T", bound=Callable[..., Any])
 def webmethod(
     route: str | None = None,
     method: str | None = None,
+    level: str | None = None,
     public: bool | None = False,
     request_examples: list[Any] | None = None,
     response_examples: list[Any] | None = None,
@@ -38,6 +41,7 @@ def webmethod(
     descriptive_name: str | None = None,
     experimental: bool | None = False,
     required_scope: str | None = None,
+    deprecated: bool | None = False,
 ) -> Callable[[T], T]:
     """
     Decorator that supplies additional metadata to an endpoint operation function.
@@ -51,9 +55,10 @@ def webmethod(
     """
 
     def wrap(func: T) -> T:
-        func.__webmethod__ = WebMethod(  # type: ignore
+        webmethod_obj = WebMethod(
             route=route,
             method=method,
+            level=level,
             public=public or False,
             request_examples=request_examples,
             response_examples=response_examples,
@@ -61,7 +66,16 @@ def webmethod(
             descriptive_name=descriptive_name,
             experimental=experimental,
             required_scope=required_scope,
+            deprecated=deprecated,
         )
+
+        # Store all webmethods in a list to support multiple decorators
+        if not hasattr(func, "__webmethods__"):
+            func.__webmethods__ = []  # type: ignore
+        func.__webmethods__.append(webmethod_obj)  # type: ignore
+
+        # Keep the last one as __webmethod__ for backwards compatibility
+        func.__webmethod__ = webmethod_obj  # type: ignore
         return func
 
     return wrap
diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
index de1427bfd..e97a9d8fb 100644
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@@ -6,6 +6,7 @@
 
 from typing import Protocol
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
 from llama_stack.schema_utils import webmethod
 
@@ -28,7 +29,7 @@ class WeatherProvider(Protocol):
     A protocol for the Weather API.
     """
 
-    @webmethod(route="/weather/locations", method="GET")
+    @webmethod(route="/weather/locations", method="GET", level=LLAMA_STACK_API_V1)
     async def get_available_locations() -> dict[str, list[str]]:
         """
         Get the available locations.