feat: add batch inference API to llama stack inference

2025-12-31 04:43:52 +00:00 · 2025-04-08 13:50:52 -07:00 · 2025-04-08 13:50:52 -07:00 · 0cfb2e2473
commit 0cfb2e2473
parent ed58a94b30
24 changed files with 1041 additions and 377 deletions
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@ -20,6 +20,7 @@ class WebMethod:
    raw_bytes_request_body: Optional[bool] = False
    # A descriptive name of the corresponding span created by tracing
    descriptive_name: Optional[str] = None
+    experimental: Optional[bool] = False


 T = TypeVar("T", bound=Callable[..., Any])
@ -33,6 +34,7 @@ def webmethod(
    response_examples: Optional[List[Any]] = None,
    raw_bytes_request_body: Optional[bool] = False,
    descriptive_name: Optional[str] = None,
+    experimental: Optional[bool] = False,
 ) -> Callable[[T], T]:
    """
    Decorator that supplies additional metadata to an endpoint operation function.
@ -52,6 +54,7 @@ def webmethod(
            response_examples=response_examples,
            raw_bytes_request_body=raw_bytes_request_body,
            descriptive_name=descriptive_name,
+            experimental=experimental,
        )
        return func