mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
feat(api): level inference/rerank and batch. remove experimental
inference/rerank is the one route in the API intended to not be deprecated. Level it as v1alpha. Additionally, remove `experimental` and opt to instead use `v1alpha` which itself implies an experimental state based on the original proposal Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
975ead1d6a
commit
d09c5c5608
5 changed files with 13 additions and 10 deletions
2
docs/static/llama-stack-spec.html
vendored
2
docs/static/llama-stack-spec.html
vendored
|
@ -5431,7 +5431,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/inference/rerank": {
|
||||
"/v1alpha/inference/rerank": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
2
docs/static/llama-stack-spec.yaml
vendored
2
docs/static/llama-stack-spec.yaml
vendored
|
@ -3895,7 +3895,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/QueryTracesRequest'
|
||||
required: true
|
||||
/v1/inference/rerank:
|
||||
/v1alpha/inference/rerank:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
|
|
@ -21,7 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
|
|||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
StopReason,
|
||||
|
@ -1070,7 +1070,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
@webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def rerank(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -29,6 +29,7 @@ from llama_stack.apis.telemetry import Telemetry
|
|||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||
from llama_stack.apis.vector_dbs import VectorDBs
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.client import get_client_impl
|
||||
from llama_stack.core.datatypes import (
|
||||
AccessRule,
|
||||
|
@ -412,8 +413,14 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
|
|||
|
||||
mro = type(obj).__mro__
|
||||
for name, value in inspect.getmembers(protocol):
|
||||
if inspect.isfunction(value) and hasattr(value, "__webmethod__"):
|
||||
if value.__webmethod__.experimental:
|
||||
if inspect.isfunction(value) and hasattr(value, "__webmethods__"):
|
||||
has_alpha_api = False
|
||||
for webmethod in value.__webmethods__:
|
||||
if webmethod.level == LLAMA_STACK_API_V1ALPHA:
|
||||
has_alpha_api = True
|
||||
break
|
||||
# if this API has multiple webmethods, and one of them is an alpha API, this API should be skipped when checking for missing or not callable routes
|
||||
if has_alpha_api:
|
||||
continue
|
||||
if not hasattr(obj, name):
|
||||
missing_methods.append((name, "missing"))
|
||||
|
|
|
@ -22,7 +22,6 @@ class WebMethod:
|
|||
raw_bytes_request_body: bool | None = False
|
||||
# A descriptive name of the corresponding span created by tracing
|
||||
descriptive_name: str | None = None
|
||||
experimental: bool | None = False
|
||||
required_scope: str | None = None
|
||||
deprecated: bool | None = False
|
||||
|
||||
|
@ -39,7 +38,6 @@ def webmethod(
|
|||
response_examples: list[Any] | None = None,
|
||||
raw_bytes_request_body: bool | None = False,
|
||||
descriptive_name: str | None = None,
|
||||
experimental: bool | None = False,
|
||||
required_scope: str | None = None,
|
||||
deprecated: bool | None = False,
|
||||
) -> Callable[[T], T]:
|
||||
|
@ -50,7 +48,6 @@ def webmethod(
|
|||
:param public: True if the operation can be invoked without prior authentication.
|
||||
:param request_examples: Sample requests that the operation might take. Pass a list of objects, not JSON.
|
||||
:param response_examples: Sample responses that the operation might produce. Pass a list of objects, not JSON.
|
||||
:param experimental: True if the operation is experimental and subject to change.
|
||||
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
|
||||
"""
|
||||
|
||||
|
@ -64,7 +61,6 @@ def webmethod(
|
|||
response_examples=response_examples,
|
||||
raw_bytes_request_body=raw_bytes_request_body,
|
||||
descriptive_name=descriptive_name,
|
||||
experimental=experimental,
|
||||
required_scope=required_scope,
|
||||
deprecated=deprecated,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue