feat(api): level inference/rerank and batch. remove experimental

inference/rerank is the one route in the API intended to not be deprecated. Level it as v1alpha. Additionally, remove `experimental` and opt to instead use `v1alpha` which itself implies an experimental state based on the original proposal Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-10-04 04:04:14 +00:00 · 2025-09-26 12:52:53 -04:00 · 2025-09-26 12:52:53 -04:00 · d09c5c5608
commit d09c5c5608
parent 975ead1d6a
5 changed files with 13 additions and 10 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -21,7 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.models.llama.datatypes import (
    BuiltinTool,
    StopReason,
@ -1070,7 +1070,7 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
+    @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def rerank(
        self,
        model: str,