feat: add batch inference API to llama stack inference

This commit is contained in:
Ashwin Bharambe 2025-04-08 13:50:52 -07:00
parent ed58a94b30
commit 0cfb2e2473
24 changed files with 1041 additions and 377 deletions

View file

@ -20,6 +20,7 @@ class WebMethod:
raw_bytes_request_body: Optional[bool] = False
# A descriptive name of the corresponding span created by tracing
descriptive_name: Optional[str] = None
experimental: Optional[bool] = False
T = TypeVar("T", bound=Callable[..., Any])
@ -33,6 +34,7 @@ def webmethod(
response_examples: Optional[List[Any]] = None,
raw_bytes_request_body: Optional[bool] = False,
descriptive_name: Optional[str] = None,
experimental: Optional[bool] = False,
) -> Callable[[T], T]:
"""
Decorator that supplies additional metadata to an endpoint operation function.
@ -52,6 +54,7 @@ def webmethod(
response_examples=response_examples,
raw_bytes_request_body=raw_bytes_request_body,
descriptive_name=descriptive_name,
experimental=experimental,
)
return func