From 8095602697e73eb9c70970180d7fc30e1d97414c Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Fri, 12 Sep 2025 13:23:57 -0400
Subject: [PATCH] feat: introduce API leveling, post_training to v1alpha

Rather than have a single `LLAMA_STACK_VERSION`, we need to have a `_V1`, `_V1ALPHA`, and `_V1BETA` constant.

This also necessitated addition of `level` to the `WebMethod` so that routing can be handeled properly.

move post_training to `v1alpha` as it is under heavy development and not near its final state

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 docs/_static/llama-stack-spec.html              | 12 ++++++------
 docs/_static/llama-stack-spec.yaml              | 12 ++++++------
 docs/openapi_generator/generate.py              |  6 +++---
 docs/openapi_generator/pyopenapi/operations.py  | 10 +++++++---
 llama_stack/apis/post_training/post_training.py | 13 +++++++------
 llama_stack/apis/version.py                     |  4 +++-
 llama_stack/core/client.py                      |  3 +--
 llama_stack/core/server/routes.py               |  3 +--
 llama_stack/schema_utils.py                     |  3 +++
 9 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 9ddb070d7..63c68d835 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -172,7 +172,7 @@
                 }
             }
         },
-        "/v1/post-training/job/cancel": {
+        "/v1alpha/post-training/job/cancel": {
             "post": {
                 "responses": {
                     "200": {
@@ -2035,7 +2035,7 @@
                 ]
             }
         },
-        "/v1/post-training/job/artifacts": {
+        "/v1alpha/post-training/job/artifacts": {
             "get": {
                 "responses": {
                     "200": {
@@ -2078,7 +2078,7 @@
                 ]
             }
         },
-        "/v1/post-training/job/status": {
+        "/v1alpha/post-training/job/status": {
             "get": {
                 "responses": {
                     "200": {
@@ -2121,7 +2121,7 @@
                 ]
             }
         },
-        "/v1/post-training/jobs": {
+        "/v1alpha/post-training/jobs": {
             "get": {
                 "responses": {
                     "200": {
@@ -4681,7 +4681,7 @@
                 }
             }
         },
-        "/v1/post-training/preference-optimize": {
+        "/v1alpha/post-training/preference-optimize": {
             "post": {
                 "responses": {
                     "200": {
@@ -5382,7 +5382,7 @@
                 }
             }
         },
-        "/v1/post-training/supervised-fine-tune": {
+        "/v1alpha/post-training/supervised-fine-tune": {
             "post": {
                 "responses": {
                     "200": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 94dc5c0f9..ea7d3248d 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -104,7 +104,7 @@ paths:
             schema:
               $ref: '#/components/schemas/BatchCompletionRequest'
         required: true
-  /v1/post-training/job/cancel:
+  /v1alpha/post-training/job/cancel:
     post:
       responses:
         '200':
@@ -1404,7 +1404,7 @@ paths:
           required: true
           schema:
             type: string
-  /v1/post-training/job/artifacts:
+  /v1alpha/post-training/job/artifacts:
     get:
       responses:
         '200':
@@ -1434,7 +1434,7 @@ paths:
           required: true
           schema:
             type: string
-  /v1/post-training/job/status:
+  /v1alpha/post-training/job/status:
     get:
       responses:
         '200':
@@ -1464,7 +1464,7 @@ paths:
           required: true
           schema:
             type: string
-  /v1/post-training/jobs:
+  /v1alpha/post-training/jobs:
     get:
       responses:
         '200':
@@ -3325,7 +3325,7 @@ paths:
             schema:
               $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
         required: true
-  /v1/post-training/preference-optimize:
+  /v1alpha/post-training/preference-optimize:
     post:
       responses:
         '200':
@@ -3812,7 +3812,7 @@ paths:
             schema:
               $ref: '#/components/schemas/SetDefaultVersionRequest'
         required: true
-  /v1/post-training/supervised-fine-tune:
+  /v1alpha/post-training/supervised-fine-tune:
     post:
       responses:
         '200':
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index c27bc6440..e3c88e954 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION  # noqa: E402
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA, LLAMA_STACK_API_V1BETA  # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
@@ -25,7 +25,7 @@ from .pyopenapi.utility import Specification, validate_api  # noqa: E402
 
 
 def str_presenter(dumper, data):
-    if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
+    if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith(f"/{LLAMA_STACK_API_V1ALPHA}") or data.startswith(f"/{LLAMA_STACK_API_V1BETA}") or data.startswith(
         "#/components/schemas/"
     ):
         style = None
@@ -58,7 +58,7 @@ def main(output_dir: str):
             server=Server(url="http://any-hosted-llama-stack.com"),
             info=Info(
                 title="Llama Stack Specification",
-                version=LLAMA_STACK_API_VERSION,
+                version=LLAMA_STACK_API_V1,
                 description="""This is the specification of the Llama Stack that provides
                 a set of endpoints and their corresponding interfaces that are tailored to
                 best leverage Llama Models.""",
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 045e33848..e1f675e42 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,7 +11,7 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 
 from termcolor import colored
 
@@ -114,10 +114,14 @@ class EndpointOperation:
     response_examples: Optional[List[Any]] = None
 
     def get_route(self) -> str:
+        # Get the API level from the webmethod decorator
+        webmethod = getattr(self.func_ref, "__webmethod__", None)
+        api_level = webmethod.level if webmethod and hasattr(webmethod, 'level') else LLAMA_STACK_API_V1
+        
         if self.route is not None:
-            return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])
+            return "/".join(["", api_level, self.route.lstrip("/")])
 
-        route_parts = ["", LLAMA_STACK_API_VERSION, self.name]
+        route_parts = ["", api_level, self.name]
         for param_name, _ in self.path_params:
             route_parts.append("{" + param_name + "}")
         return "/".join(route_parts)
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index c16221289..2b7a6222f 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.common.job_types import JobStatus
 from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
@@ -283,7 +284,7 @@ class PostTrainingJobArtifactsResponse(BaseModel):
 
 
 class PostTraining(Protocol):
-    @webmethod(route="/post-training/supervised-fine-tune", method="POST")
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def supervised_fine_tune(
         self,
         job_uuid: str,
@@ -310,7 +311,7 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/preference-optimize", method="POST")
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def preference_optimize(
         self,
         job_uuid: str,
@@ -332,7 +333,7 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/jobs", method="GET")
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
         """Get all training jobs.
 
@@ -340,7 +341,7 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/status", method="GET")
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
         """Get the status of a training job.
 
@@ -349,7 +350,7 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/cancel", method="POST")
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def cancel_training_job(self, job_uuid: str) -> None:
         """Cancel a training job.
 
@@ -357,7 +358,7 @@ class PostTraining(Protocol):
         """
         ...
 
-    @webmethod(route="/post-training/job/artifacts", method="GET")
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
     async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
         """Get the artifacts of a training job.
 
diff --git a/llama_stack/apis/version.py b/llama_stack/apis/version.py
index 53ad6a854..6af039b1f 100644
--- a/llama_stack/apis/version.py
+++ b/llama_stack/apis/version.py
@@ -4,4 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-LLAMA_STACK_API_VERSION = "v1"
+LLAMA_STACK_API_V1 = "v1"
+LLAMA_STACK_API_V1BETA = "v1beta"
+LLAMA_STACK_API_V1ALPHA = "v1alpha"
diff --git a/llama_stack/core/client.py b/llama_stack/core/client.py
index 03e4fb051..2c79a5f3e 100644
--- a/llama_stack/core/client.py
+++ b/llama_stack/core/client.py
@@ -15,7 +15,6 @@ import httpx
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.providers.datatypes import RemoteProviderConfig
 
 _CLIENT_CLASSES = {}
@@ -114,7 +113,7 @@ def create_api_client_class(protocol) -> type:
                     break
                 kwargs[param.name] = args[i]
 
-            url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
+            url = f"{self.base_url}/{webmethod.level}/{webmethod.route.lstrip('/')}"
 
             def convert(value):
                 if isinstance(value, list):
diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py
index 7baf20da5..c23865a8c 100644
--- a/llama_stack/core/server/routes.py
+++ b/llama_stack/core/server/routes.py
@@ -14,7 +14,6 @@ from starlette.routing import Route
 
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
-from llama_stack.apis.version import LLAMA_STACK_API_VERSION
 from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod
 
@@ -60,7 +59,7 @@ def get_all_api_routes(
             # The __webmethod__ attribute is dynamically added by the @webmethod decorator
             # mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error
             webmethod = method.__webmethod__  # type: ignore[attr-defined]
-            path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
+            path = f"/{webmethod.level}/{webmethod.route.lstrip('/')}"
             if webmethod.method == hdrs.METH_GET:
                 http_method = hdrs.METH_GET
             elif webmethod.method == hdrs.METH_DELETE:
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 93382a881..625b19a08 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -13,6 +13,7 @@ from .strong_typing.schema import json_schema_type, register_schema  # noqa: F40
 
 @dataclass
 class WebMethod:
+    level: str | None = "v1"
     route: str | None = None
     public: bool = False
     request_examples: list[Any] | None = None
@@ -31,6 +32,7 @@ T = TypeVar("T", bound=Callable[..., Any])
 def webmethod(
     route: str | None = None,
     method: str | None = None,
+    level: str | None = "v1",
     public: bool | None = False,
     request_examples: list[Any] | None = None,
     response_examples: list[Any] | None = None,
@@ -54,6 +56,7 @@ def webmethod(
         func.__webmethod__ = WebMethod(  # type: ignore
             route=route,
             method=method,
+            level=level,
             public=public or False,
             request_examples=request_examples,
             response_examples=response_examples,