From aac42ddcc2832133873ba1b7cd1d74996e21564a Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 29 Sep 2025 15:42:09 -0400 Subject: [PATCH] feat(api): level inference/rerank and remove experimental (#3565) # What does this PR do? inference/rerank is the one route in the API intended to not be deprecated. Level it as v1alpha. Additionally, remove `experimental` and opt to instead use `v1alpha` which itself implies an experimental state based on the original proposal Signed-off-by: Charlie Doern --- docs/static/llama-stack-spec.html | 2 +- docs/static/llama-stack-spec.yaml | 2 +- llama_stack/apis/inference/inference.py | 4 ++-- llama_stack/core/resolver.py | 11 +++++++++-- llama_stack/schema_utils.py | 4 ---- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 7845fb068..32ead1764 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -5431,7 +5431,7 @@ } } }, - "/v1/inference/rerank": { + "/v1alpha/inference/rerank": { "post": { "responses": { "200": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 8cbbccaa2..3b5b92060 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -3895,7 +3895,7 @@ paths: schema: $ref: '#/components/schemas/QueryTracesRequest' required: true - /v1/inference/rerank: + /v1alpha/inference/rerank: post: responses: '200': diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index c6a4e4f60..134da5bf8 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -21,7 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte from llama_stack.apis.common.responses import Order from llama_stack.apis.models import Model from llama_stack.apis.telemetry import MetricResponseMixin -from llama_stack.apis.version import LLAMA_STACK_API_V1 +from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.models.llama.datatypes import ( BuiltinTool, StopReason, @@ -1070,7 +1070,7 @@ class InferenceProvider(Protocol): """ ... - @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1) + @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA) async def rerank( self, model: str, diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index 373446de6..f421c47ed 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -29,6 +29,7 @@ from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.core.client import get_client_impl from llama_stack.core.datatypes import ( AccessRule, @@ -412,8 +413,14 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None: mro = type(obj).__mro__ for name, value in inspect.getmembers(protocol): - if inspect.isfunction(value) and hasattr(value, "__webmethod__"): - if value.__webmethod__.experimental: + if inspect.isfunction(value) and hasattr(value, "__webmethods__"): + has_alpha_api = False + for webmethod in value.__webmethods__: + if webmethod.level == LLAMA_STACK_API_V1ALPHA: + has_alpha_api = True + break + # if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes + if has_alpha_api: continue if not hasattr(obj, name): missing_methods.append((name, "missing")) diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py index 4f8b4edff..c58fcdd01 100644 --- a/llama_stack/schema_utils.py +++ b/llama_stack/schema_utils.py @@ -22,7 +22,6 @@ class WebMethod: raw_bytes_request_body: bool | None = False # A descriptive name of the corresponding span created by tracing descriptive_name: str | None = None - experimental: bool | None = False required_scope: str | None = None deprecated: bool | None = False @@ -39,7 +38,6 @@ def webmethod( response_examples: list[Any] | None = None, raw_bytes_request_body: bool | None = False, descriptive_name: str | None = None, - experimental: bool | None = False, required_scope: str | None = None, deprecated: bool | None = False, ) -> Callable[[T], T]: @@ -50,7 +48,6 @@ def webmethod( :param public: True if the operation can be invoked without prior authentication. :param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON. :param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON. - :param experimental: True if the operation is experimental and subject to change. :param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer'). """ @@ -64,7 +61,6 @@ def webmethod( response_examples=response_examples, raw_bytes_request_body=raw_bytes_request_body, descriptive_name=descriptive_name, - experimental=experimental, required_scope=required_scope, deprecated=deprecated, )