From c88c4ff2c63536350ca9f714bf804f7fc69995c0 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Fri, 26 Sep 2025 10:18:07 -0400
Subject: [PATCH] feat: introduce API leveling, post_training, eval to v1alpha
 (#3449)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

Rather than have a single `LLAMA_STACK_VERSION`, we need to have a
`_V1`, `_V1ALPHA`, and `_V1BETA` constant.

This also necessitated addition of `level` to the `WebMethod` so that
routing can be handeled properly.


For backwards compat, the `v1` routes are being kept around and marked
as `deprecated`. When used, the server will log a deprecation warning.

Deprecation log:

<img width="1224" height="134" alt="Screenshot 2025-09-25 at 2 43 36 PM"
src="https://github.com/user-attachments/assets/0cc7c245-dafc-48f0-be99-269fb9a686f9"
/>

move:
1. post_training to `v1alpha` as it is under heavy development and not
near its final state
2. eval: job scheduling is not implemented. Relies heavily on the
datasetio API which is under development missing implementations of
specific routes indicating the structure of those routes might change.
Additionally eval depends on the `inference` API which is going to be
deprecated, eval will likely need a major API surface change to conform
to using completions properly

implements leveling in #3317

note: integration tests will fail until the SDK is regenerated with
v1alpha/inference as opposed to v1/inference

## Test Plan

existing tests should pass with newly generated schema. Conformance will
also pass as these routes are not the ones we currently test for
stability

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 docs/openapi_generator/generate.py            |   6 +-
 docs/openapi_generator/pyopenapi/generator.py |   2 +-
 .../openapi_generator/pyopenapi/operations.py | 233 ++++---
 docs/static/llama-stack-spec.html             | 653 ++++++++++++++++++
 docs/static/llama-stack-spec.yaml             | 458 ++++++++++++
 llama_stack/apis/agents/agents.py             |  40 +-
 .../apis/batch_inference/batch_inference.py   |   5 +-
 llama_stack/apis/batches/batches.py           |   9 +-
 llama_stack/apis/benchmarks/benchmarks.py     |  13 +-
 llama_stack/apis/datasetio/datasetio.py       |   5 +-
 llama_stack/apis/datasets/datasets.py         |   9 +-
 llama_stack/apis/eval/eval.py                 |  32 +-
 llama_stack/apis/files/files.py               |  11 +-
 llama_stack/apis/inference/inference.py       |  23 +-
 llama_stack/apis/inspect/inspect.py           |   7 +-
 llama_stack/apis/models/models.py             |  11 +-
 .../apis/post_training/post_training.py       |  19 +-
 llama_stack/apis/prompts/prompts.py           |  15 +-
 llama_stack/apis/providers/providers.py       |   5 +-
 llama_stack/apis/safety/safety.py             |   5 +-
 llama_stack/apis/scoring/scoring.py           |   5 +-
 .../scoring_functions/scoring_functions.py    |   9 +-
 llama_stack/apis/shields/shields.py           |   9 +-
 .../synthetic_data_generation.py              |   3 +-
 llama_stack/apis/telemetry/telemetry.py       |  29 +-
 llama_stack/apis/tools/rag_tool.py            |   5 +-
 llama_stack/apis/tools/tools.py               |  17 +-
 llama_stack/apis/vector_dbs/vector_dbs.py     |   9 +-
 llama_stack/apis/vector_io/vector_io.py       |  39 +-
 llama_stack/apis/version.py                   |   4 +-
 llama_stack/core/client.py                    |  20 +-
 llama_stack/core/server/routes.py             |  30 +-
 llama_stack/core/server/tracing.py            |   8 +
 llama_stack/schema_utils.py                   |  16 +-
 .../src/llama_stack_api_weather/weather.py    |   3 +-
 35 files changed, 1507 insertions(+), 260 deletions(-)
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index c27bc6440..54031d839 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION  # noqa: E402
+from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
@@ -25,7 +25,7 @@ from .pyopenapi.utility import Specification, validate_api  # noqa: E402
 
 
 def str_presenter(dumper, data):
-    if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
+    if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith(
         "#/components/schemas/"
     ):
         style = None
@@ -58,7 +58,7 @@ def main(output_dir: str):
             server=Server(url="http://any-hosted-llama-stack.com"),
             info=Info(
                 title="Llama Stack Specification",
-                version=LLAMA_STACK_API_VERSION,
+                version=LLAMA_STACK_API_V1,
                 description="""This is the specification of the Llama Stack that provides
                 a set of endpoints and their corresponding interfaces that are tailored to
                 best leverage Llama Models.""",
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 5ac712f02..cdbf1c60c 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -829,7 +829,7 @@ class Generator:
             else:
                 raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
 
-            route = op.get_route()
+            route = op.get_route(op.webmethod)
             route = route.replace(":path", "")
             print(f"route: {route}")
             if route in paths:
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 045e33848..ce33d3bb9 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,7 +11,7 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 
 from termcolor import colored
 
@@ -113,11 +113,13 @@ class EndpointOperation:
     request_examples: Optional[List[Any]] = None
     response_examples: Optional[List[Any]] = None
 
-    def get_route(self) -> str:
-        if self.route is not None:
-            return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])
+    def get_route(self, webmethod) -> str:
+        api_level = webmethod.level
 
-        route_parts = ["", LLAMA_STACK_API_VERSION, self.name]
+        if self.route is not None:
+            return "/".join(["", api_level, self.route.lstrip("/")])
+
+        route_parts = ["", api_level, self.name]
         for param_name, _ in self.path_params:
             route_parts.append("{" + param_name + "}")
         return "/".join(route_parts)
@@ -152,33 +154,39 @@ def _get_endpoint_functions(
 
     functions = inspect.getmembers(endpoint, inspect.isfunction)
     for func_name, func_ref in functions:
-        webmethod = getattr(func_ref, "__webmethod__", None)
-        if not webmethod:
+        webmethods = []
+
+        # Check for multiple webmethods (stacked decorators)
+        if hasattr(func_ref, "__webmethods__"):
+            webmethods = func_ref.__webmethods__
+
+        if not webmethods:
             continue
 
-        print(f"Processing {colored(func_name, 'white')}...")
-        operation_name = func_name
-        
-        if webmethod.method == "GET":
-            prefix = "get"
-        elif webmethod.method == "DELETE":
-            prefix = "delete"
-        elif webmethod.method == "POST":
-            prefix = "post"
-        elif operation_name.startswith("get_") or operation_name.endswith("/get"):
-            prefix = "get"
-        elif (
-            operation_name.startswith("delete_")
-            or operation_name.startswith("remove_")
-            or operation_name.endswith("/delete")
-            or operation_name.endswith("/remove")
-        ):
-            prefix = "delete"
-        else:
-            # by default everything else is a POST
-            prefix = "post"
+        for webmethod in webmethods:
+            print(f"Processing {colored(func_name, 'white')}...")
+            operation_name = func_name
+            
+            if webmethod.method == "GET":
+                prefix = "get"
+            elif webmethod.method == "DELETE":
+                prefix = "delete"
+            elif webmethod.method == "POST":
+                prefix = "post"
+            elif operation_name.startswith("get_") or operation_name.endswith("/get"):
+                prefix = "get"
+            elif (
+                operation_name.startswith("delete_")
+                or operation_name.startswith("remove_")
+                or operation_name.endswith("/delete")
+                or operation_name.endswith("/remove")
+            ):
+                prefix = "delete"
+            else:
+                # by default everything else is a POST
+                prefix = "post"
 
-        yield prefix, operation_name, func_name, func_ref
+            yield prefix, operation_name, func_name, func_ref
 
 
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
@@ -239,105 +247,101 @@ def get_endpoint_operations(
             "update",
         ],
     ):
-        # extract routing information from function metadata
-        webmethod = getattr(func_ref, "__webmethod__", None)
-        if webmethod is not None:
+        # Get all webmethods for this function
+        webmethods = getattr(func_ref, "__webmethods__", [])
+
+        # Create one EndpointOperation for each webmethod
+        for webmethod in webmethods:
             route = webmethod.route
             route_params = _get_route_parameters(route) if route is not None else None
             public = webmethod.public
             request_examples = webmethod.request_examples
             response_examples = webmethod.response_examples
-        else:
-            route = None
-            route_params = None
-            public = False
-            request_examples = None
-            response_examples = None
 
-        # inspect function signature for path and query parameters, and request/response payload type
-        signature = get_signature(func_ref)
+            # inspect function signature for path and query parameters, and request/response payload type
+            signature = get_signature(func_ref)
 
-        path_params = []
-        query_params = []
-        request_params = []
-        multipart_params = []
+            path_params = []
+            query_params = []
+            request_params = []
+            multipart_params = []
 
-        for param_name, parameter in signature.parameters.items():
-            param_type = _get_annotation_type(parameter.annotation, func_ref)
+            for param_name, parameter in signature.parameters.items():
+                param_type = _get_annotation_type(parameter.annotation, func_ref)
 
-            # omit "self" for instance methods
-            if param_name == "self" and param_type is inspect.Parameter.empty:
-                continue
+                # omit "self" for instance methods
+                if param_name == "self" and param_type is inspect.Parameter.empty:
+                    continue
 
-            # check if all parameters have explicit type
-            if parameter.annotation is inspect.Parameter.empty:
+                # check if all parameters have explicit type
+                if parameter.annotation is inspect.Parameter.empty:
+                    raise ValidationError(
+                        f"parameter '{param_name}' in function '{func_name}' has no type annotation"
+                    )
+
+                is_multipart = _is_multipart_param(param_type)
+
+                if prefix in ["get", "delete"]:
+                    if route_params is not None and param_name in route_params:
+                        path_params.append((param_name, param_type))
+                    else:
+                        query_params.append((param_name, param_type))
+                else:
+                    if route_params is not None and param_name in route_params:
+                        path_params.append((param_name, param_type))
+                    elif is_multipart:
+                        multipart_params.append((param_name, param_type))
+                    else:
+                        request_params.append((param_name, param_type))
+
+            # check if function has explicit return type
+            if signature.return_annotation is inspect.Signature.empty:
                 raise ValidationError(
-                    f"parameter '{param_name}' in function '{func_name}' has no type annotation"
+                    f"function '{func_name}' has no return type annotation"
                 )
 
-            is_multipart = _is_multipart_param(param_type)
-            
-            if prefix in ["get", "delete"]:
-                if route_params is not None and param_name in route_params:
-                    path_params.append((param_name, param_type))
-                else:
-                    query_params.append((param_name, param_type))
+            return_type = _get_annotation_type(signature.return_annotation, func_ref)
+
+            # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
+            # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
+            if typing.get_origin(return_type) is collections.abc.Generator:
+                event_type, send_type, response_type = typing.get_args(return_type)
+                if send_type is not type(None):
+                    raise ValidationError(
+                        f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
+                    )
             else:
-                if route_params is not None and param_name in route_params:
-                    path_params.append((param_name, param_type))
-                elif is_multipart:
-                    multipart_params.append((param_name, param_type))
+                event_type = None
+
+                def process_type(t):
+                    if typing.get_origin(t) is collections.abc.AsyncIterator:
+                        # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
+                        # or the item type. I am choosing it to be the latter
+                        args = typing.get_args(t)
+                        return args[0]
+                    elif typing.get_origin(t) is typing.Union:
+                        types = [process_type(a) for a in typing.get_args(t)]
+                        return typing._UnionGenericAlias(typing.Union, tuple(types))
+                    else:
+                        return t
+
+                response_type = process_type(return_type)
+
+                if prefix in ["delete", "remove"]:
+                    http_method = HTTPMethod.DELETE
+                elif prefix == "post":
+                    http_method = HTTPMethod.POST
+                elif prefix == "get":
+                    http_method = HTTPMethod.GET
+                elif prefix == "set":
+                    http_method = HTTPMethod.PUT
+                elif prefix == "update":
+                    http_method = HTTPMethod.PATCH
                 else:
-                    request_params.append((param_name, param_type))
+                    raise ValidationError(f"unknown prefix {prefix}")
 
-        # check if function has explicit return type
-        if signature.return_annotation is inspect.Signature.empty:
-            raise ValidationError(
-                f"function '{func_name}' has no return type annotation"
-            )
-
-        return_type = _get_annotation_type(signature.return_annotation, func_ref)
-
-        # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
-        # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
-        if typing.get_origin(return_type) is collections.abc.Generator:
-            event_type, send_type, response_type = typing.get_args(return_type)
-            if send_type is not type(None):
-                raise ValidationError(
-                    f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
-                )
-        else:
-            event_type = None
-
-            def process_type(t):
-                if typing.get_origin(t) is collections.abc.AsyncIterator:
-                    # NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
-                    # or the item type. I am choosing it to be the latter
-                    args = typing.get_args(t)
-                    return args[0]
-                elif typing.get_origin(t) is typing.Union:
-                    types = [process_type(a) for a in typing.get_args(t)]
-                    return typing._UnionGenericAlias(typing.Union, tuple(types))
-                else:
-                    return t
-
-            response_type = process_type(return_type)
-
-            if prefix in ["delete", "remove"]:
-                http_method = HTTPMethod.DELETE
-            elif prefix == "post":
-                http_method = HTTPMethod.POST
-            elif prefix == "get":
-                http_method = HTTPMethod.GET
-            elif prefix == "set":
-                http_method = HTTPMethod.PUT
-            elif prefix == "update":
-                http_method = HTTPMethod.PATCH
-            else:
-                raise ValidationError(f"unknown prefix {prefix}")
-
-        result.append(
-            EndpointOperation(
+            # Create an EndpointOperation for this specific webmethod
+            operation = EndpointOperation(
                 defining_class=_get_defining_class(func_name, endpoint),
                 name=operation_name,
                 func_name=func_name,
@@ -354,7 +358,10 @@ def get_endpoint_operations(
                 request_examples=request_examples if use_examples else None,
                 response_examples=response_examples if use_examples else None,
             )
-        )
+
+            # Store the specific webmethod with this operation
+            operation.webmethod = webmethod
+            result.append(operation)
 
     if not result:
         raise ValidationError(f"no eligible endpoint operations in type {endpoint}")
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 36cb025c4..5a61221c1 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -175,6 +175,43 @@
                 }
             }
         },
+        "/v1alpha/post-training/job/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Cancel a training job.",
+                "description": "Cancel a training job.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CancelTrainingJobRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/post-training/job/cancel": {
             "post": {
                 "responses": {
@@ -1179,6 +1216,60 @@
                 }
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "EvaluateResponse object containing generations and scores.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EvaluateResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Evaluate a list of rows on a benchmark.",
+                "description": "Evaluate a list of rows on a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/EvaluateRowsRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/evaluations": {
             "post": {
                 "responses": {
@@ -1366,6 +1457,85 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A Benchmark.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Benchmark"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "Get a benchmark by its ID.",
+                "description": "Get a benchmark by its ID.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to get.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "Unregister a benchmark.",
+                "description": "Unregister a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to unregister.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}": {
             "get": {
                 "responses": {
@@ -2079,6 +2249,50 @@
                 ]
             }
         },
+        "/v1alpha/post-training/job/artifacts": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJobArtifactsResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Get the artifacts of a training job.",
+                "description": "Get the artifacts of a training job.",
+                "parameters": [
+                    {
+                        "name": "job_uuid",
+                        "in": "query",
+                        "description": "The UUID of the job to get the artifacts of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/post-training/job/artifacts": {
             "get": {
                 "responses": {
@@ -2123,6 +2337,50 @@
                 ]
             }
         },
+        "/v1alpha/post-training/job/status": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJobStatusResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJobStatusResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Get the status of a training job.",
+                "description": "Get the status of a training job.",
+                "parameters": [
+                    {
+                        "name": "job_uuid",
+                        "in": "query",
+                        "description": "The UUID of the job to get the status of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/post-training/job/status": {
             "get": {
                 "responses": {
@@ -2167,6 +2425,40 @@
                 ]
             }
         },
+        "/v1alpha/post-training/jobs": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListPostTrainingJobsResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListPostTrainingJobsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Get all training jobs.",
+                "description": "Get all training jobs.",
+                "parameters": []
+            }
+        },
         "/v1/post-training/jobs": {
             "get": {
                 "responses": {
@@ -2538,6 +2830,103 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The status of the evaluation job.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Job"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Get the status of a job.",
+                "description": "Get the status of a job.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "description": "The ID of the job to get the status of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Cancel a job.",
+                "description": "Cancel a job.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "description": "The ID of the job to cancel.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
             "get": {
                 "responses": {
@@ -2635,6 +3024,59 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "The result of the job.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EvaluateResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Get the result of a job.",
+                "description": "Get the result of a job.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "job_id",
+                        "in": "path",
+                        "description": "The ID of the job to get the result of.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
             "get": {
                 "responses": {
@@ -2750,6 +3192,75 @@
                 ]
             }
         },
+        "/v1alpha/eval/benchmarks": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListBenchmarksResponse.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListBenchmarksResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "List all benchmarks.",
+                "description": "List all benchmarks.",
+                "parameters": []
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Benchmarks"
+                ],
+                "summary": "Register a benchmark.",
+                "description": "Register a benchmark.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RegisterBenchmarkRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/eval/benchmarks": {
             "get": {
                 "responses": {
@@ -4783,6 +5294,50 @@
                 }
             }
         },
+        "/v1alpha/post-training/preference-optimize": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJob.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJob"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Run preference optimization of a model.",
+                "description": "Run preference optimization of a model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/PreferenceOptimizeRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/post-training/preference-optimize": {
             "post": {
                 "responses": {
@@ -5178,6 +5733,60 @@
                 }
             }
         },
+        "/v1alpha/eval/benchmarks/{benchmark_id}/jobs": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The job that was created to run the evaluation.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Job"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "summary": "Run an evaluation on a benchmark.",
+                "description": "Run an evaluation on a benchmark.",
+                "parameters": [
+                    {
+                        "name": "benchmark_id",
+                        "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RunEvalRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/eval/benchmarks/{benchmark_id}/jobs": {
             "post": {
                 "responses": {
@@ -5499,6 +6108,50 @@
                 }
             }
         },
+        "/v1alpha/post-training/supervised-fine-tune": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A PostTrainingJob.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PostTrainingJob"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "PostTraining (Coming Soon)"
+                ],
+                "summary": "Run supervised fine-tuning of a model.",
+                "description": "Run supervised fine-tuning of a model.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/SupervisedFineTuneRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/post-training/supervised-fine-tune": {
             "post": {
                 "responses": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index c4b82b630..9c0558658 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -109,6 +109,32 @@ paths:
             schema:
               $ref: '#/components/schemas/BatchCompletionRequest'
         required: true
+  /v1alpha/post-training/job/cancel:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Cancel a training job.
+      description: Cancel a training job.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CancelTrainingJobRequest'
+        required: true
   /v1/post-training/job/cancel:
     post:
       responses:
@@ -832,6 +858,44 @@ paths:
             schema:
               $ref: '#/components/schemas/EmbeddingsRequest'
         required: true
+  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
+    post:
+      responses:
+        '200':
+          description: >-
+            EvaluateResponse object containing generations and scores.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Evaluate a list of rows on a benchmark.
+      description: Evaluate a list of rows on a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/EvaluateRowsRequest'
+        required: true
   /v1/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -962,6 +1026,61 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/eval/benchmarks/{benchmark_id}:
+    get:
+      responses:
+        '200':
+          description: A Benchmark.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Benchmark'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Get a benchmark by its ID.
+      description: Get a benchmark by its ID.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: The ID of the benchmark to get.
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Unregister a benchmark.
+      description: Unregister a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: The ID of the benchmark to unregister.
+          required: true
+          schema:
+            type: string
   /v1/eval/benchmarks/{benchmark_id}:
     get:
       responses:
@@ -1458,6 +1577,37 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/post-training/job/artifacts:
+    get:
+      responses:
+        '200':
+          description: A PostTrainingJobArtifactsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Get the artifacts of a training job.
+      description: Get the artifacts of a training job.
+      parameters:
+        - name: job_uuid
+          in: query
+          description: >-
+            The UUID of the job to get the artifacts of.
+          required: true
+          schema:
+            type: string
   /v1/post-training/job/artifacts:
     get:
       responses:
@@ -1489,6 +1639,37 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/post-training/job/status:
+    get:
+      responses:
+        '200':
+          description: A PostTrainingJobStatusResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobStatusResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Get the status of a training job.
+      description: Get the status of a training job.
+      parameters:
+        - name: job_uuid
+          in: query
+          description: >-
+            The UUID of the job to get the status of.
+          required: true
+          schema:
+            type: string
   /v1/post-training/job/status:
     get:
       responses:
@@ -1520,6 +1701,30 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/post-training/jobs:
+    get:
+      responses:
+        '200':
+          description: A ListPostTrainingJobsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListPostTrainingJobsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Get all training jobs.
+      description: Get all training jobs.
+      parameters: []
   /v1/post-training/jobs:
     get:
       responses:
@@ -1804,6 +2009,75 @@ paths:
           required: false
           schema:
             type: integer
+  /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
+    get:
+      responses:
+        '200':
+          description: The status of the evaluation job.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Job'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Get the status of a job.
+      description: Get the status of a job.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+        - name: job_id
+          in: path
+          description: The ID of the job to get the status of.
+          required: true
+          schema:
+            type: string
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Cancel a job.
+      description: Cancel a job.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+        - name: job_id
+          in: path
+          description: The ID of the job to cancel.
+          required: true
+          schema:
+            type: string
   /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
       responses:
@@ -1873,6 +2147,43 @@ paths:
           required: true
           schema:
             type: string
+  /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
+    get:
+      responses:
+        '200':
+          description: The result of the job.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Get the result of a job.
+      description: Get the result of a job.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+        - name: job_id
+          in: path
+          description: The ID of the job to get the result of.
+          required: true
+          schema:
+            type: string
   /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
     get:
       responses:
@@ -1953,6 +2264,55 @@ paths:
           required: false
           schema:
             type: integer
+  /v1alpha/eval/benchmarks:
+    get:
+      responses:
+        '200':
+          description: A ListBenchmarksResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListBenchmarksResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: List all benchmarks.
+      description: List all benchmarks.
+      parameters: []
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Benchmarks
+      summary: Register a benchmark.
+      description: Register a benchmark.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterBenchmarkRequest'
+        required: true
   /v1/eval/benchmarks:
     get:
       responses:
@@ -3454,6 +3814,36 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
+  /v1alpha/post-training/preference-optimize:
+    post:
+      responses:
+        '200':
+          description: A PostTrainingJob.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Run preference optimization of a model.
+      description: Run preference optimization of a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/PreferenceOptimizeRequest'
+        required: true
   /v1/post-training/preference-optimize:
     post:
       responses:
@@ -3735,6 +4125,44 @@ paths:
             schema:
               $ref: '#/components/schemas/ResumeAgentTurnRequest'
         required: true
+  /v1alpha/eval/benchmarks/{benchmark_id}/jobs:
+    post:
+      responses:
+        '200':
+          description: >-
+            The job that was created to run the evaluation.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Job'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Eval
+      summary: Run an evaluation on a benchmark.
+      description: Run an evaluation on a benchmark.
+      parameters:
+        - name: benchmark_id
+          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RunEvalRequest'
+        required: true
   /v1/eval/benchmarks/{benchmark_id}/jobs:
     post:
       responses:
@@ -3961,6 +4389,36 @@ paths:
             schema:
               $ref: '#/components/schemas/SetDefaultVersionRequest'
         required: true
+  /v1alpha/post-training/supervised-fine-tune:
+    post:
+      responses:
+        '200':
+          description: A PostTrainingJob.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - PostTraining (Coming Soon)
+      summary: Run supervised fine-tuning of a model.
+      description: Run supervised fine-tuning of a model.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/SupervisedFineTuneRequest'
+        required: true
   /v1/post-training/supervised-fine-tune:
     post:
       responses:
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 7dd3e9289..e53ca82e2 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 from .openai_responses import (
@@ -481,7 +482,7 @@ class Agents(Protocol):
     - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
     """
 
-    @webmethod(route="/agents", method="POST", descriptive_name="create_agent")
+    @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
     async def create_agent(
         self,
         agent_config: AgentConfig,
@@ -494,7 +495,10 @@ class Agents(Protocol):
         ...
 
     @webmethod(
-        route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
+        route="/agents/{agent_id}/session/{session_id}/turn",
+        method="POST",
+        descriptive_name="create_agent_turn",
+        level=LLAMA_STACK_API_V1,
     )
     async def create_agent_turn(
         self,
@@ -524,6 +528,7 @@ class Agents(Protocol):
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
         method="POST",
         descriptive_name="resume_agent_turn",
+        level=LLAMA_STACK_API_V1,
     )
     async def resume_agent_turn(
         self,
@@ -549,6 +554,7 @@ class Agents(Protocol):
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
         method="GET",
+        level=LLAMA_STACK_API_V1,
     )
     async def get_agents_turn(
         self,
@@ -568,6 +574,7 @@ class Agents(Protocol):
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
         method="GET",
+        level=LLAMA_STACK_API_V1,
     )
     async def get_agents_step(
         self,
@@ -586,7 +593,12 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
+    @webmethod(
+        route="/agents/{agent_id}/session",
+        method="POST",
+        descriptive_name="create_agent_session",
+        level=LLAMA_STACK_API_V1,
+    )
     async def create_agent_session(
         self,
         agent_id: str,
@@ -600,7 +612,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_agents_session(
         self,
         session_id: str,
@@ -616,7 +628,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_agents_session(
         self,
         session_id: str,
@@ -629,7 +641,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_agent(
         self,
         agent_id: str,
@@ -640,7 +652,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents", method="GET")
+    @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
     async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
         """List all agents.
 
@@ -650,7 +662,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_agent(self, agent_id: str) -> Agent:
         """Describe an agent by its ID.
 
@@ -659,7 +671,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/agents/{agent_id}/sessions", method="GET")
+    @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_agent_sessions(
         self,
         agent_id: str,
@@ -682,7 +694,7 @@ class Agents(Protocol):
     #
     # Both of these APIs are inherently stateful.
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_openai_response(
         self,
         response_id: str,
@@ -694,7 +706,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="POST")
+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
     async def create_openai_response(
         self,
         input: str | list[OpenAIResponseInput],
@@ -719,7 +731,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="GET")
+    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_responses(
         self,
         after: str | None = None,
@@ -737,7 +749,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_response_input_items(
         self,
         response_id: str,
@@ -759,7 +771,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
         """Delete an OpenAI response by its ID.
 
diff --git a/llama_stack/apis/batch_inference/batch_inference.py b/llama_stack/apis/batch_inference/batch_inference.py
index b2aa637e2..43ade0221 100644
--- a/llama_stack/apis/batch_inference/batch_inference.py
+++ b/llama_stack/apis/batch_inference/batch_inference.py
@@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod
 
 
@@ -30,7 +31,7 @@ class BatchInference(Protocol):
     including (post-training, evals, etc).
     """
 
-    @webmethod(route="/batch-inference/completion", method="POST")
+    @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
     async def completion(
         self,
         model: str,
@@ -50,7 +51,7 @@ class BatchInference(Protocol):
         """
         ...
 
-    @webmethod(route="/batch-inference/chat-completion", method="POST")
+    @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
     async def chat_completion(
         self,
         model: str,
diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py
index c6bbd92eb..5890cbe04 100644
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 try:
@@ -42,7 +43,7 @@ class Batches(Protocol):
     Note: This API is currently under active development and may undergo changes.
     """
 
-    @webmethod(route="/openai/v1/batches", method="POST")
+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
     async def create_batch(
         self,
         input_file_id: str,
@@ -62,7 +63,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def retrieve_batch(self, batch_id: str) -> BatchObject:
         """Retrieve information about a specific batch.
 
@@ -71,7 +72,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     async def cancel_batch(self, batch_id: str) -> BatchObject:
         """Cancel a batch that is in progress.
 
@@ -80,7 +81,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches", method="GET")
+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
     async def list_batches(
         self,
         after: str | None = None,
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py
index 8d0a25e7b..d87d45a60 100644
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 
@@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
 
 @runtime_checkable
 class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET")
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def list_benchmarks(self) -> ListBenchmarksResponse:
         """List all benchmarks.
 
@@ -61,7 +63,8 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_benchmark(
         self,
         benchmark_id: str,
@@ -73,7 +76,8 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks", method="POST")
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def register_benchmark(
         self,
         benchmark_id: str,
@@ -94,7 +98,8 @@ class Benchmarks(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
     async def unregister_benchmark(self, benchmark_id: str) -> None:
         """Unregister a benchmark.
 
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index 1183983cc..27e5336bc 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
 
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasets import Dataset
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod
 
 
@@ -20,7 +21,7 @@ class DatasetIO(Protocol):
     # keeping for aligning with inference/safety, but this is not used
     dataset_store: DatasetStore
 
-    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def iterrows(
         self,
         dataset_id: str,
@@ -44,7 +45,7 @@ class DatasetIO(Protocol):
         """
         ...
 
-    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
+    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
     async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
         """Append rows to a dataset.
 
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index f347e0e29..be0cbf09a 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
 
 
 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST")
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
     async def register_dataset(
         self,
         purpose: DatasetPurpose,
@@ -214,7 +215,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_dataset(
         self,
         dataset_id: str,
@@ -226,7 +227,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets", method="GET")
+    @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
     async def list_datasets(self) -> ListDatasetsResponse:
         """List all datasets.
 
@@ -234,7 +235,7 @@ class Datasets(Protocol):
         """
         ...
 
-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_dataset(
         self,
         dataset_id: str,
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 83a0a8e56..bb81778f1 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
 class Eval(Protocol):
     """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def run_eval(
         self,
         benchmark_id: str,
@@ -97,7 +99,10 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def evaluate_rows(
         self,
         benchmark_id: str,
@@ -115,7 +120,10 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def job_status(self, benchmark_id: str, job_id: str) -> Job:
         """Get the status of a job.
 
@@ -125,7 +133,13 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
     async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
         """Cancel a job.
 
@@ -134,7 +148,15 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
+    )
     async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
         """Get the result of a job.
 
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 124e4bc8e..7e45b55ee 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.common.responses import Order
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
 @trace_protocol
 class Files(Protocol):
     # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST")
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_upload_file(
         self,
         file: Annotated[UploadFile, File()],
@@ -127,7 +128,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files", method="GET")
+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files(
         self,
         after: str | None = None,
@@ -146,7 +147,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file(
         self,
         file_id: str,
@@ -159,7 +160,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_file(
         self,
         file_id: str,
@@ -172,7 +173,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file_content(
         self,
         file_id: str,
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index bd4737ca7..4f5332b5f 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
@@ -1026,7 +1027,7 @@ class InferenceProvider(Protocol):
 
     model_store: ModelStore | None = None
 
-    @webmethod(route="/inference/completion", method="POST")
+    @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
     async def completion(
         self,
         model_id: str,
@@ -1049,7 +1050,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/batch-completion", method="POST", experimental=True)
+    @webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def batch_completion(
         self,
         model_id: str,
@@ -1070,7 +1071,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Batch completion is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/inference/chat-completion", method="POST")
+    @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
     async def chat_completion(
         self,
         model_id: str,
@@ -1110,7 +1111,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
+    @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def batch_chat_completion(
         self,
         model_id: str,
@@ -1135,7 +1136,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Batch chat completion is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/inference/embeddings", method="POST")
+    @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def embeddings(
         self,
         model_id: str,
@@ -1155,7 +1156,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/inference/rerank", method="POST", experimental=True)
+    @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
     async def rerank(
         self,
         model: str,
@@ -1174,7 +1175,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Reranking is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/openai/v1/completions", method="POST")
+    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
         # Standard OpenAI completion parameters
@@ -1225,7 +1226,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/chat/completions", method="POST")
+    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
         model: str,
@@ -1281,7 +1282,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/embeddings", method="POST")
+    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
         model: str,
@@ -1310,7 +1311,7 @@ class Inference(InferenceProvider):
     - Embedding models: these models generate embeddings to be used for semantic search.
     """
 
-    @webmethod(route="/openai/v1/chat/completions", method="GET")
+    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
         after: str | None = None,
@@ -1328,7 +1329,7 @@ class Inference(InferenceProvider):
         """
         raise NotImplementedError("List chat completions is not implemented")
 
-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
+    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Describe a chat completion by its ID.
 
diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py
index 91d9c3da7..e859dbe59 100644
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
 
 @runtime_checkable
 class Inspect(Protocol):
-    @webmethod(route="/inspect/routes", method="GET")
+    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
     async def list_routes(self) -> ListRoutesResponse:
         """List all available API routes with their methods and implementing providers.
 
@@ -65,7 +66,7 @@ class Inspect(Protocol):
         """
         ...
 
-    @webmethod(route="/health", method="GET")
+    @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
     async def health(self) -> HealthInfo:
         """Get the current health status of the service.
 
@@ -73,7 +74,7 @@ class Inspect(Protocol):
         """
         ...
 
-    @webmethod(route="/version", method="GET")
+    @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
     async def version(self) -> VersionInfo:
         """Get the version of the service.
 
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 1af6fc9df..a4f6a888b 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Models(Protocol):
-    @webmethod(route="/models", method="GET")
+    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
     async def list_models(self) -> ListModelsResponse:
         """List all models.
 
@@ -110,7 +111,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/models", method="GET")
+    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_models(self) -> OpenAIListModelsResponse:
         """List models using the OpenAI API.
 
@@ -118,7 +119,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models/{model_id:path}", method="GET")
+    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_model(
         self,
         model_id: str,
@@ -130,7 +131,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models", method="POST")
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
     async def register_model(
         self,
         model_id: str,
@@ -150,7 +151,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/models/{model_id:path}", method="DELETE")
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_model(
         self,
         model_id: str,
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index c16221289..30a51f765 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.common.job_types import JobStatus
 from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel):
 
 
 class PostTraining(Protocol):
-    @webmethod(route="/post-training/supervised-fine-tune", method="POST")
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def supervised_fine_tune(
         self,
         job_uuid: str,
@@ -310,7 +312,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/preference-optimize", method="POST")
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def preference_optimize(
         self,
         job_uuid: str,
@@ -332,7 +335,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/jobs", method="GET")
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
         """Get all training jobs.
 
@@ -340,7 +344,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/status", method="GET")
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
         """Get the status of a training job.
 
@@ -349,7 +354,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/cancel", method="POST")
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def cancel_training_job(self, job_uuid: str) -> None:
         """Cancel a training job.
 
@@ -357,7 +363,8 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/artifacts", method="GET")
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
         """Get the artifacts of a training job.
 
diff --git a/llama_stack/apis/prompts/prompts.py b/llama_stack/apis/prompts/prompts.py
index e6a376c3f..c56185e25 100644
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -95,7 +96,7 @@ class ListPromptsResponse(BaseModel):
 class Prompts(Protocol):
     """Protocol for prompt management operations."""
 
-    @webmethod(route="/prompts", method="GET")
+    @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
     async def list_prompts(self) -> ListPromptsResponse:
         """List all prompts.
 
@@ -103,7 +104,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}/versions", method="GET")
+    @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_prompt_versions(
         self,
         prompt_id: str,
@@ -115,7 +116,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}", method="GET")
+    @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_prompt(
         self,
         prompt_id: str,
@@ -129,7 +130,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts", method="POST")
+    @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
     async def create_prompt(
         self,
         prompt: str,
@@ -143,7 +144,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}", method="PUT")
+    @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
     async def update_prompt(
         self,
         prompt_id: str,
@@ -163,7 +164,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}", method="DELETE")
+    @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_prompt(
         self,
         prompt_id: str,
@@ -174,7 +175,7 @@ class Prompts(Protocol):
         """
         ...
 
-    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
+    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
     async def set_default_version(
         self,
         prompt_id: str,
diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py
index 8a1e93d8f..d1cff0f6c 100644
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthResponse
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -45,7 +46,7 @@ class Providers(Protocol):
     Providers API for inspecting, listing, and modifying providers and their configurations.
     """
 
-    @webmethod(route="/providers", method="GET")
+    @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
     async def list_providers(self) -> ListProvidersResponse:
         """List all available providers.
 
@@ -53,7 +54,7 @@ class Providers(Protocol):
         """
         ...
 
-    @webmethod(route="/providers/{provider_id}", method="GET")
+    @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def inspect_provider(self, provider_id: str) -> ProviderInfo:
         """Get detailed information about a specific provider.
 
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index 25ee03ec1..98367e9b0 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -11,6 +11,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import Message
 from llama_stack.apis.shields import Shield
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -97,7 +98,7 @@ class ShieldStore(Protocol):
 class Safety(Protocol):
     shield_store: ShieldStore
 
-    @webmethod(route="/safety/run-shield", method="POST")
+    @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
     async def run_shield(
         self,
         shield_id: str,
@@ -113,7 +114,7 @@ class Safety(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/moderations", method="POST")
+    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
     async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
         """Classifies if text and/or image inputs are potentially harmful.
         :param input: Input (or inputs) to classify.
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 8ca599b44..03d943e94 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
 
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 # mapping of metric to value
@@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol):
 class Scoring(Protocol):
     scoring_function_store: ScoringFunctionStore
 
-    @webmethod(route="/scoring/score-batch", method="POST")
+    @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
     async def score_batch(
         self,
         dataset_id: str,
@@ -77,7 +78,7 @@ class Scoring(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring/score", method="POST")
+    @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
     async def score(
         self,
         input_rows: list[dict[str, Any]],
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 541067766..fe49723ab 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -18,6 +18,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.apis.common.type_system import ParamType
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel):
 
 @runtime_checkable
 class ScoringFunctions(Protocol):
-    @webmethod(route="/scoring-functions", method="GET")
+    @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
         """List all scoring functions.
 
@@ -168,7 +169,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
         """Get a scoring function by its ID.
 
@@ -177,7 +178,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions", method="POST")
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
     async def register_scoring_function(
         self,
         scoring_fn_id: str,
@@ -198,7 +199,7 @@ class ScoringFunctions(Protocol):
         """
         ...
 
-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
         """Unregister a scoring function.
 
diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py
index ec1b85349..5d967cf02 100644
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Shields(Protocol):
-    @webmethod(route="/shields", method="GET")
+    @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
     async def list_shields(self) -> ListShieldsResponse:
         """List all shields.
 
@@ -57,7 +58,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields/{identifier:path}", method="GET")
+    @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_shield(self, identifier: str) -> Shield:
         """Get a shield by its identifier.
 
@@ -66,7 +67,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields", method="POST")
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
     async def register_shield(
         self,
         shield_id: str,
@@ -84,7 +85,7 @@ class Shields(Protocol):
         """
         ...
 
-    @webmethod(route="/shields/{identifier:path}", method="DELETE")
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_shield(self, identifier: str) -> None:
         """Unregister a shield.
 
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
index a7af44b28..c13e2c17c 100644
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@@ -10,6 +10,7 @@ from typing import Any, Protocol
 from pydantic import BaseModel
 
 from llama_stack.apis.inference import Message
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod
 
 
@@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel):
 
 
 class SyntheticDataGeneration(Protocol):
-    @webmethod(route="/synthetic-data-generation/generate")
+    @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
     def synthetic_data_generate(
         self,
         dialogs: list[Message],
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 8d1b5d697..29dd23989 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -16,6 +16,7 @@ from typing import (
 
 from pydantic import BaseModel, Field
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import Primitive
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
@@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel):
 
 @runtime_checkable
 class Telemetry(Protocol):
-    @webmethod(route="/telemetry/events", method="POST")
+    @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
     async def log_event(
         self,
         event: Event,
@@ -425,7 +426,7 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
     async def query_traces(
         self,
         attribute_filters: list[QueryCondition] | None = None,
@@ -443,7 +444,9 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
+    )
     async def get_trace(self, trace_id: str) -> Trace:
         """Get a trace by its ID.
 
@@ -453,7 +456,10 @@ class Telemetry(Protocol):
         ...
 
     @webmethod(
-        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE
+        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
+        method="GET",
+        required_scope=REQUIRED_SCOPE,
+        level=LLAMA_STACK_API_V1,
     )
     async def get_span(self, trace_id: str, span_id: str) -> Span:
         """Get a span by its ID.
@@ -464,7 +470,12 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/spans/{span_id:path}/tree",
+        method="POST",
+        required_scope=REQUIRED_SCOPE,
+        level=LLAMA_STACK_API_V1,
+    )
     async def get_span_tree(
         self,
         span_id: str,
@@ -480,7 +491,7 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
     async def query_spans(
         self,
         attribute_filters: list[QueryCondition],
@@ -496,7 +507,7 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans/export", method="POST")
+    @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
     async def save_spans_to_dataset(
         self,
         attribute_filters: list[QueryCondition],
@@ -513,7 +524,9 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
+    )
     async def query_metrics(
         self,
         metric_name: str,
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 651016bd1..ed7847e23 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator
 from typing_extensions import runtime_checkable
 
 from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
@@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel):
 @runtime_checkable
 @trace_protocol
 class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
+    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
     async def insert(
         self,
         documents: list[RAGDocument],
@@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol):
         """
         ...
 
-    @webmethod(route="/tool-runtime/rag-tool/query", method="POST")
+    @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
     async def query(
         self,
         content: InterleavedContent,
diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py
index 52b86375a..12bd9efa2 100644
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable
 
 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -151,7 +152,7 @@ class ListToolDefsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class ToolGroups(Protocol):
-    @webmethod(route="/toolgroups", method="POST")
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
     async def register_tool_group(
         self,
         toolgroup_id: str,
@@ -168,7 +169,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_tool_group(
         self,
         toolgroup_id: str,
@@ -180,7 +181,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups", method="GET")
+    @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
     async def list_tool_groups(self) -> ListToolGroupsResponse:
         """List tool groups with optional provider.
 
@@ -188,7 +189,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/tools", method="GET")
+    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
         """List tools with optional tool group.
 
@@ -197,7 +198,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/tools/{tool_name:path}", method="GET")
+    @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_tool(
         self,
         tool_name: str,
@@ -209,7 +210,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_toolgroup(
         self,
         toolgroup_id: str,
@@ -238,7 +239,7 @@ class ToolRuntime(Protocol):
     rag_tool: RAGToolRuntime | None = None
 
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
-    @webmethod(route="/tool-runtime/list-tools", method="GET")
+    @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_runtime_tools(
         self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
     ) -> ListToolDefsResponse:
@@ -250,7 +251,7 @@ class ToolRuntime(Protocol):
         """
         ...
 
-    @webmethod(route="/tool-runtime/invoke", method="POST")
+    @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
     async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
         """Run a tool with the given arguments.
 
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
index 47820fa0f..521d129c6 100644
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
 
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
@@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class VectorDBs(Protocol):
-    @webmethod(route="/vector-dbs", method="GET")
+    @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
     async def list_vector_dbs(self) -> ListVectorDBsResponse:
         """List all vector databases.
 
@@ -73,7 +74,7 @@ class VectorDBs(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
+    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_vector_db(
         self,
         vector_db_id: str,
@@ -85,7 +86,7 @@ class VectorDBs(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-dbs", method="POST")
+    @webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1)
     async def register_vector_db(
         self,
         vector_db_id: str,
@@ -107,7 +108,7 @@ class VectorDBs(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
+    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def unregister_vector_db(self, vector_db_id: str) -> None:
         """Unregister a vector database.
 
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 3e8065cfb..2850863c4 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
@@ -437,7 +438,7 @@ class VectorIO(Protocol):
 
     # this will just block now until chunks are inserted, but it should
     # probably return a Job instance which can be polled for completion
-    @webmethod(route="/vector-io/insert", method="POST")
+    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
     async def insert_chunks(
         self,
         vector_db_id: str,
@@ -455,7 +456,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/vector-io/query", method="POST")
+    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
     async def query_chunks(
         self,
         vector_db_id: str,
@@ -472,7 +473,7 @@ class VectorIO(Protocol):
         ...
 
     # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST")
+    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_create_vector_store(
         self,
         name: str | None = None,
@@ -498,7 +499,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores", method="GET")
+    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_vector_stores(
         self,
         limit: int | None = 20,
@@ -516,7 +517,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store(
         self,
         vector_store_id: str,
@@ -528,7 +529,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store(
         self,
         vector_store_id: str,
@@ -546,7 +547,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store(
         self,
         vector_store_id: str,
@@ -558,7 +559,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_search_vector_store(
         self,
         vector_store_id: str,
@@ -584,7 +585,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_attach_file_to_vector_store(
         self,
         vector_store_id: str,
@@ -602,7 +603,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files_in_vector_store(
         self,
         vector_store_id: str,
@@ -624,7 +625,9 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
+    )
     async def openai_retrieve_vector_store_file(
         self,
         vector_store_id: str,
@@ -638,7 +641,11 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
     async def openai_retrieve_vector_store_file_contents(
         self,
         vector_store_id: str,
@@ -652,7 +659,9 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
+    )
     async def openai_update_vector_store_file(
         self,
         vector_store_id: str,
@@ -668,7 +677,9 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
+    )
     async def openai_delete_vector_store_file(
         self,
         vector_store_id: str,
diff --git a/llama_stack/apis/version.py b/llama_stack/apis/version.py
index 53ad6a854..6af039b1f 100644
--- a/llama_stack/apis/version.py
+++ b/llama_stack/apis/version.py
@@ -4,4 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-LLAMA_STACK_API_VERSION = "v1"
+LLAMA_STACK_API_V1 = "v1"
+LLAMA_STACK_API_V1BETA = "v1beta"
+LLAMA_STACK_API_V1ALPHA = "v1alpha"
diff --git a/llama_stack/core/client.py b/llama_stack/core/client.py
index 03e4fb051..49e01794e 100644
--- a/llama_stack/core/client.py
+++ b/llama_stack/core/client.py
@@ -15,7 +15,6 @@ import httpx
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.providers.datatypes import RemoteProviderConfig
 
 _CLIENT_CLASSES = {}
@@ -114,7 +113,24 @@ def create_api_client_class(protocol) -> type:
                     break
                 kwargs[param.name] = args[i]
 
-            url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
+            # Get all webmethods for this method (supports multiple decorators)
+            webmethods = getattr(method, "__webmethods__", [])
+
+            if not webmethods:
+                raise RuntimeError(f"Method {method} has no webmethod decorators")
+
+            # Choose the preferred webmethod (non-deprecated if available)
+            preferred_webmethod = None
+            for wm in webmethods:
+                if not getattr(wm, "deprecated", False):
+                    preferred_webmethod = wm
+                    break
+
+            # If no non-deprecated found, use the first one
+            if preferred_webmethod is None:
+                preferred_webmethod = webmethods[0]
+
+            url = f"{self.base_url}/{preferred_webmethod.level}/{preferred_webmethod.route.lstrip('/')}"
 
             def convert(value):
                 if isinstance(value, list):
diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py
index 7baf20da5..4970d0bf8 100644
--- a/llama_stack/core/server/routes.py
+++ b/llama_stack/core/server/routes.py
@@ -14,7 +14,6 @@ from starlette.routing import Route
 
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod
 
@@ -54,22 +53,23 @@ def get_all_api_routes(
                     protocol_methods.append((f"{tool_group.value}.{name}", method))
 
         for name, method in protocol_methods:
-            if not hasattr(method, "__webmethod__"):
+            # Get all webmethods for this method (supports multiple decorators)
+            webmethods = getattr(method, "__webmethods__", [])
+            if not webmethods:
                 continue
 
-            # The __webmethod__ attribute is dynamically added by the @webmethod decorator
-            # mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error
-            webmethod = method.__webmethod__  # type: ignore[attr-defined]
-            path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
-            if webmethod.method == hdrs.METH_GET:
-                http_method = hdrs.METH_GET
-            elif webmethod.method == hdrs.METH_DELETE:
-                http_method = hdrs.METH_DELETE
-            else:
-                http_method = hdrs.METH_POST
-            routes.append(
-                (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
-            )  # setting endpoint to None since don't use a Router object
+            # Create routes for each webmethod decorator
+            for webmethod in webmethods:
+                path = f"/{webmethod.level}/{webmethod.route.lstrip('/')}"
+                if webmethod.method == hdrs.METH_GET:
+                    http_method = hdrs.METH_GET
+                elif webmethod.method == hdrs.METH_DELETE:
+                    http_method = hdrs.METH_DELETE
+                else:
+                    http_method = hdrs.METH_POST
+                routes.append(
+                    (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
+                )  # setting endpoint to None since don't use a Router object
 
         apis[api] = routes
 
diff --git a/llama_stack/core/server/tracing.py b/llama_stack/core/server/tracing.py
index c48fc4d33..4c6df5b42 100644
--- a/llama_stack/core/server/tracing.py
+++ b/llama_stack/core/server/tracing.py
@@ -45,6 +45,14 @@ class TracingMiddleware:
             logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
             return await self.app(scope, receive, send)
 
+        # Log deprecation warning if route is deprecated
+        if getattr(webmethod, "deprecated", False):
+            logger.warning(
+                f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
+                f"This route is deprecated and may be removed in a future version. "
+                f"Please check the docs for the supported version."
+            )
+
         trace_attributes = {"__location__": "server", "raw_path": path}
 
         # Extract W3C trace context headers and store as trace attributes
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 93382a881..4f8b4edff 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -13,6 +13,7 @@ from .strong_typing.schema import json_schema_type, register_schema  # noqa: F40
 
 @dataclass
 class WebMethod:
+    level: str | None = None
     route: str | None = None
     public: bool = False
     request_examples: list[Any] | None = None
@@ -23,6 +24,7 @@ class WebMethod:
     descriptive_name: str | None = None
     experimental: bool | None = False
     required_scope: str | None = None
+    deprecated: bool | None = False
 
 
 T = TypeVar("T", bound=Callable[..., Any])
@@ -31,6 +33,7 @@ T = TypeVar("T", bound=Callable[..., Any])
 def webmethod(
     route: str | None = None,
     method: str | None = None,
+    level: str | None = None,
     public: bool | None = False,
     request_examples: list[Any] | None = None,
     response_examples: list[Any] | None = None,
@@ -38,6 +41,7 @@ def webmethod(
     descriptive_name: str | None = None,
     experimental: bool | None = False,
     required_scope: str | None = None,
+    deprecated: bool | None = False,
 ) -> Callable[[T], T]:
     """
     Decorator that supplies additional metadata to an endpoint operation function.
@@ -51,9 +55,10 @@ def webmethod(
     """
 
     def wrap(func: T) -> T:
-        func.__webmethod__ = WebMethod(  # type: ignore
+        webmethod_obj = WebMethod(
             route=route,
             method=method,
+            level=level,
             public=public or False,
             request_examples=request_examples,
             response_examples=response_examples,
@@ -61,7 +66,16 @@ def webmethod(
             descriptive_name=descriptive_name,
             experimental=experimental,
             required_scope=required_scope,
+            deprecated=deprecated,
         )
+
+        # Store all webmethods in a list to support multiple decorators
+        if not hasattr(func, "__webmethods__"):
+            func.__webmethods__ = []  # type: ignore
+        func.__webmethods__.append(webmethod_obj)  # type: ignore
+
+        # Keep the last one as __webmethod__ for backwards compatibility
+        func.__webmethod__ = webmethod_obj  # type: ignore
         return func
 
     return wrap
diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
index de1427bfd..e97a9d8fb 100644
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@@ -6,6 +6,7 @@
 
 from typing import Protocol
 
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
 from llama_stack.schema_utils import webmethod
 
@@ -28,7 +29,7 @@ class WeatherProvider(Protocol):
     A protocol for the Weather API.
     """
 
-    @webmethod(route="/weather/locations", method="GET")
+    @webmethod(route="/weather/locations", method="GET", level=LLAMA_STACK_API_V1)
     async def get_available_locations() -> dict[str, list[str]]:
         """
         Get the available locations.