feat(api): level inference/rerank and remove experimental (#3565)

# What does this PR do?

inference/rerank is the one route in the API intended to not be
deprecated. Level it as v1alpha.

Additionally, remove `experimental` and opt to instead use `v1alpha`
which itself implies an experimental state based on the original
proposal

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-09-29 15:42:09 -04:00 committed by GitHub
parent 975ead1d6a
commit aac42ddcc2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 13 additions and 10 deletions

View file

@ -5431,7 +5431,7 @@
} }
} }
}, },
"/v1/inference/rerank": { "/v1alpha/inference/rerank": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {

View file

@ -3895,7 +3895,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/QueryTracesRequest' $ref: '#/components/schemas/QueryTracesRequest'
required: true required: true
/v1/inference/rerank: /v1alpha/inference/rerank:
post: post:
responses: responses:
'200': '200':

View file

@ -21,7 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.apis.telemetry import MetricResponseMixin from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
StopReason, StopReason,
@ -1070,7 +1070,7 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1) @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def rerank( async def rerank(
self, self,
model: str, model: str,

View file

@ -29,6 +29,7 @@ from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.core.client import get_client_impl from llama_stack.core.client import get_client_impl
from llama_stack.core.datatypes import ( from llama_stack.core.datatypes import (
AccessRule, AccessRule,
@ -412,8 +413,14 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
mro = type(obj).__mro__ mro = type(obj).__mro__
for name, value in inspect.getmembers(protocol): for name, value in inspect.getmembers(protocol):
if inspect.isfunction(value) and hasattr(value, "__webmethod__"): if inspect.isfunction(value) and hasattr(value, "__webmethods__"):
if value.__webmethod__.experimental: has_alpha_api = False
for webmethod in value.__webmethods__:
if webmethod.level == LLAMA_STACK_API_V1ALPHA:
has_alpha_api = True
break
# if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes
if has_alpha_api:
continue continue
if not hasattr(obj, name): if not hasattr(obj, name):
missing_methods.append((name, "missing")) missing_methods.append((name, "missing"))

View file

@ -22,7 +22,6 @@ class WebMethod:
raw_bytes_request_body: bool | None = False raw_bytes_request_body: bool | None = False
# A descriptive name of the corresponding span created by tracing # A descriptive name of the corresponding span created by tracing
descriptive_name: str | None = None descriptive_name: str | None = None
experimental: bool | None = False
required_scope: str | None = None required_scope: str | None = None
deprecated: bool | None = False deprecated: bool | None = False
@ -39,7 +38,6 @@ def webmethod(
response_examples: list[Any] | None = None, response_examples: list[Any] | None = None,
raw_bytes_request_body: bool | None = False, raw_bytes_request_body: bool | None = False,
descriptive_name: str | None = None, descriptive_name: str | None = None,
experimental: bool | None = False,
required_scope: str | None = None, required_scope: str | None = None,
deprecated: bool | None = False, deprecated: bool | None = False,
) -> Callable[[T], T]: ) -> Callable[[T], T]:
@ -50,7 +48,6 @@ def webmethod(
:param public: True if the operation can be invoked without prior authentication. :param public: True if the operation can be invoked without prior authentication.
:param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON. :param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON.
:param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON. :param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON.
:param experimental: True if the operation is experimental and subject to change.
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer'). :param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
""" """
@ -64,7 +61,6 @@ def webmethod(
response_examples=response_examples, response_examples=response_examples,
raw_bytes_request_body=raw_bytes_request_body, raw_bytes_request_body=raw_bytes_request_body,
descriptive_name=descriptive_name, descriptive_name=descriptive_name,
experimental=experimental,
required_scope=required_scope, required_scope=required_scope,
deprecated=deprecated, deprecated=deprecated,
) )