feat: introduce API leveling, post_training, eval to v1alpha (#3449)

# What does this PR do?

Rather than have a single `LLAMA_STACK_VERSION`, we need to have a
`_V1`, `_V1ALPHA`, and `_V1BETA` constant.

This also necessitated addition of `level` to the `WebMethod` so that
routing can be handeled properly.


For backwards compat, the `v1` routes are being kept around and marked
as `deprecated`. When used, the server will log a deprecation warning.

Deprecation log:

<img width="1224" height="134" alt="Screenshot 2025-09-25 at 2 43 36 PM"
src="https://github.com/user-attachments/assets/0cc7c245-dafc-48f0-be99-269fb9a686f9"
/>

move:
1. post_training to `v1alpha` as it is under heavy development and not
near its final state
2. eval: job scheduling is not implemented. Relies heavily on the
datasetio API which is under development missing implementations of
specific routes indicating the structure of those routes might change.
Additionally eval depends on the `inference` API which is going to be
deprecated, eval will likely need a major API surface change to conform
to using completions properly

implements leveling in #3317 

note: integration tests will fail until the SDK is regenerated with
v1alpha/inference as opposed to v1/inference

## Test Plan

existing tests should pass with newly generated schema. Conformance will
also pass as these routes are not the ones we currently test for
stability

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-09-26 10:18:07 -04:00 committed by GitHub
parent 65e01b5684
commit c88c4ff2c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 1507 additions and 260 deletions

View file

@ -16,7 +16,7 @@ import sys
import fire import fire
import ruamel.yaml as yaml import ruamel.yaml as yaml
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402 from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
from llama_stack.core.stack import LlamaStack # noqa: E402 from llama_stack.core.stack import LlamaStack # noqa: E402
from .pyopenapi.options import Options # noqa: E402 from .pyopenapi.options import Options # noqa: E402
@ -25,7 +25,7 @@ from .pyopenapi.utility import Specification, validate_api # noqa: E402
def str_presenter(dumper, data): def str_presenter(dumper, data):
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith( if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith(
"#/components/schemas/" "#/components/schemas/"
): ):
style = None style = None
@ -58,7 +58,7 @@ def main(output_dir: str):
server=Server(url="http://any-hosted-llama-stack.com"), server=Server(url="http://any-hosted-llama-stack.com"),
info=Info( info=Info(
title="Llama Stack Specification", title="Llama Stack Specification",
version=LLAMA_STACK_API_VERSION, version=LLAMA_STACK_API_V1,
description="""This is the specification of the Llama Stack that provides description="""This is the specification of the Llama Stack that provides
a set of endpoints and their corresponding interfaces that are tailored to a set of endpoints and their corresponding interfaces that are tailored to
best leverage Llama Models.""", best leverage Llama Models.""",

View file

@ -829,7 +829,7 @@ class Generator:
else: else:
raise NotImplementedError(f"unknown HTTP method: {op.http_method}") raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
route = op.get_route() route = op.get_route(op.webmethod)
route = route.replace(":path", "") route = route.replace(":path", "")
print(f"route: {route}") print(f"route: {route}")
if route in paths: if route in paths:

View file

@ -11,7 +11,7 @@ import typing
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
from llama_stack.apis.version import LLAMA_STACK_API_VERSION from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
from termcolor import colored from termcolor import colored
@ -113,11 +113,13 @@ class EndpointOperation:
request_examples: Optional[List[Any]] = None request_examples: Optional[List[Any]] = None
response_examples: Optional[List[Any]] = None response_examples: Optional[List[Any]] = None
def get_route(self) -> str: def get_route(self, webmethod) -> str:
if self.route is not None: api_level = webmethod.level
return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])
route_parts = ["", LLAMA_STACK_API_VERSION, self.name] if self.route is not None:
return "/".join(["", api_level, self.route.lstrip("/")])
route_parts = ["", api_level, self.name]
for param_name, _ in self.path_params: for param_name, _ in self.path_params:
route_parts.append("{" + param_name + "}") route_parts.append("{" + param_name + "}")
return "/".join(route_parts) return "/".join(route_parts)
@ -152,33 +154,39 @@ def _get_endpoint_functions(
functions = inspect.getmembers(endpoint, inspect.isfunction) functions = inspect.getmembers(endpoint, inspect.isfunction)
for func_name, func_ref in functions: for func_name, func_ref in functions:
webmethod = getattr(func_ref, "__webmethod__", None) webmethods = []
if not webmethod:
# Check for multiple webmethods (stacked decorators)
if hasattr(func_ref, "__webmethods__"):
webmethods = func_ref.__webmethods__
if not webmethods:
continue continue
print(f"Processing {colored(func_name, 'white')}...") for webmethod in webmethods:
operation_name = func_name print(f"Processing {colored(func_name, 'white')}...")
operation_name = func_name
if webmethod.method == "GET": if webmethod.method == "GET":
prefix = "get" prefix = "get"
elif webmethod.method == "DELETE": elif webmethod.method == "DELETE":
prefix = "delete" prefix = "delete"
elif webmethod.method == "POST": elif webmethod.method == "POST":
prefix = "post" prefix = "post"
elif operation_name.startswith("get_") or operation_name.endswith("/get"): elif operation_name.startswith("get_") or operation_name.endswith("/get"):
prefix = "get" prefix = "get"
elif ( elif (
operation_name.startswith("delete_") operation_name.startswith("delete_")
or operation_name.startswith("remove_") or operation_name.startswith("remove_")
or operation_name.endswith("/delete") or operation_name.endswith("/delete")
or operation_name.endswith("/remove") or operation_name.endswith("/remove")
): ):
prefix = "delete" prefix = "delete"
else: else:
# by default everything else is a POST # by default everything else is a POST
prefix = "post" prefix = "post"
yield prefix, operation_name, func_name, func_ref yield prefix, operation_name, func_name, func_ref
def _get_defining_class(member_fn: str, derived_cls: type) -> type: def _get_defining_class(member_fn: str, derived_cls: type) -> type:
@ -239,105 +247,101 @@ def get_endpoint_operations(
"update", "update",
], ],
): ):
# extract routing information from function metadata # Get all webmethods for this function
webmethod = getattr(func_ref, "__webmethod__", None) webmethods = getattr(func_ref, "__webmethods__", [])
if webmethod is not None:
# Create one EndpointOperation for each webmethod
for webmethod in webmethods:
route = webmethod.route route = webmethod.route
route_params = _get_route_parameters(route) if route is not None else None route_params = _get_route_parameters(route) if route is not None else None
public = webmethod.public public = webmethod.public
request_examples = webmethod.request_examples request_examples = webmethod.request_examples
response_examples = webmethod.response_examples response_examples = webmethod.response_examples
else:
route = None
route_params = None
public = False
request_examples = None
response_examples = None
# inspect function signature for path and query parameters, and request/response payload type # inspect function signature for path and query parameters, and request/response payload type
signature = get_signature(func_ref) signature = get_signature(func_ref)
path_params = [] path_params = []
query_params = [] query_params = []
request_params = [] request_params = []
multipart_params = [] multipart_params = []
for param_name, parameter in signature.parameters.items(): for param_name, parameter in signature.parameters.items():
param_type = _get_annotation_type(parameter.annotation, func_ref) param_type = _get_annotation_type(parameter.annotation, func_ref)
# omit "self" for instance methods # omit "self" for instance methods
if param_name == "self" and param_type is inspect.Parameter.empty: if param_name == "self" and param_type is inspect.Parameter.empty:
continue continue
# check if all parameters have explicit type # check if all parameters have explicit type
if parameter.annotation is inspect.Parameter.empty: if parameter.annotation is inspect.Parameter.empty:
raise ValidationError(
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
)
is_multipart = _is_multipart_param(param_type)
if prefix in ["get", "delete"]:
if route_params is not None and param_name in route_params:
path_params.append((param_name, param_type))
else:
query_params.append((param_name, param_type))
else:
if route_params is not None and param_name in route_params:
path_params.append((param_name, param_type))
elif is_multipart:
multipart_params.append((param_name, param_type))
else:
request_params.append((param_name, param_type))
# check if function has explicit return type
if signature.return_annotation is inspect.Signature.empty:
raise ValidationError( raise ValidationError(
f"parameter '{param_name}' in function '{func_name}' has no type annotation" f"function '{func_name}' has no return type annotation"
) )
is_multipart = _is_multipart_param(param_type) return_type = _get_annotation_type(signature.return_annotation, func_ref)
if prefix in ["get", "delete"]: # operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
if route_params is not None and param_name in route_params: # where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
path_params.append((param_name, param_type)) if typing.get_origin(return_type) is collections.abc.Generator:
else: event_type, send_type, response_type = typing.get_args(return_type)
query_params.append((param_name, param_type)) if send_type is not type(None):
raise ValidationError(
f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
)
else: else:
if route_params is not None and param_name in route_params: event_type = None
path_params.append((param_name, param_type))
elif is_multipart: def process_type(t):
multipart_params.append((param_name, param_type)) if typing.get_origin(t) is collections.abc.AsyncIterator:
# NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
# or the item type. I am choosing it to be the latter
args = typing.get_args(t)
return args[0]
elif typing.get_origin(t) is typing.Union:
types = [process_type(a) for a in typing.get_args(t)]
return typing._UnionGenericAlias(typing.Union, tuple(types))
else:
return t
response_type = process_type(return_type)
if prefix in ["delete", "remove"]:
http_method = HTTPMethod.DELETE
elif prefix == "post":
http_method = HTTPMethod.POST
elif prefix == "get":
http_method = HTTPMethod.GET
elif prefix == "set":
http_method = HTTPMethod.PUT
elif prefix == "update":
http_method = HTTPMethod.PATCH
else: else:
request_params.append((param_name, param_type)) raise ValidationError(f"unknown prefix {prefix}")
# check if function has explicit return type # Create an EndpointOperation for this specific webmethod
if signature.return_annotation is inspect.Signature.empty: operation = EndpointOperation(
raise ValidationError(
f"function '{func_name}' has no return type annotation"
)
return_type = _get_annotation_type(signature.return_annotation, func_ref)
# operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
# where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
if typing.get_origin(return_type) is collections.abc.Generator:
event_type, send_type, response_type = typing.get_args(return_type)
if send_type is not type(None):
raise ValidationError(
f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
)
else:
event_type = None
def process_type(t):
if typing.get_origin(t) is collections.abc.AsyncIterator:
# NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
# or the item type. I am choosing it to be the latter
args = typing.get_args(t)
return args[0]
elif typing.get_origin(t) is typing.Union:
types = [process_type(a) for a in typing.get_args(t)]
return typing._UnionGenericAlias(typing.Union, tuple(types))
else:
return t
response_type = process_type(return_type)
if prefix in ["delete", "remove"]:
http_method = HTTPMethod.DELETE
elif prefix == "post":
http_method = HTTPMethod.POST
elif prefix == "get":
http_method = HTTPMethod.GET
elif prefix == "set":
http_method = HTTPMethod.PUT
elif prefix == "update":
http_method = HTTPMethod.PATCH
else:
raise ValidationError(f"unknown prefix {prefix}")
result.append(
EndpointOperation(
defining_class=_get_defining_class(func_name, endpoint), defining_class=_get_defining_class(func_name, endpoint),
name=operation_name, name=operation_name,
func_name=func_name, func_name=func_name,
@ -354,7 +358,10 @@ def get_endpoint_operations(
request_examples=request_examples if use_examples else None, request_examples=request_examples if use_examples else None,
response_examples=response_examples if use_examples else None, response_examples=response_examples if use_examples else None,
) )
)
# Store the specific webmethod with this operation
operation.webmethod = webmethod
result.append(operation)
if not result: if not result:
raise ValidationError(f"no eligible endpoint operations in type {endpoint}") raise ValidationError(f"no eligible endpoint operations in type {endpoint}")

View file

@ -175,6 +175,43 @@
} }
} }
}, },
"/v1alpha/post-training/job/cancel": {
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"summary": "Cancel a training job.",
"description": "Cancel a training job.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CancelTrainingJobRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/job/cancel": { "/v1/post-training/job/cancel": {
"post": { "post": {
"responses": { "responses": {
@ -1179,6 +1216,60 @@
} }
} }
}, },
"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": {
"post": {
"responses": {
"200": {
"description": "EvaluateResponse object containing generations and scores.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluateResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Eval"
],
"summary": "Evaluate a list of rows on a benchmark.",
"description": "Evaluate a list of rows on a benchmark.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluateRowsRequest"
}
}
},
"required": true
}
}
},
"/v1/eval/benchmarks/{benchmark_id}/evaluations": { "/v1/eval/benchmarks/{benchmark_id}/evaluations": {
"post": { "post": {
"responses": { "responses": {
@ -1366,6 +1457,85 @@
] ]
} }
}, },
"/v1alpha/eval/benchmarks/{benchmark_id}": {
"get": {
"responses": {
"200": {
"description": "A Benchmark.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Benchmark"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Benchmarks"
],
"summary": "Get a benchmark by its ID.",
"description": "Get a benchmark by its ID.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to get.",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Benchmarks"
],
"summary": "Unregister a benchmark.",
"description": "Unregister a benchmark.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to unregister.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/eval/benchmarks/{benchmark_id}": { "/v1/eval/benchmarks/{benchmark_id}": {
"get": { "get": {
"responses": { "responses": {
@ -2079,6 +2249,50 @@
] ]
} }
}, },
"/v1alpha/post-training/job/artifacts": {
"get": {
"responses": {
"200": {
"description": "A PostTrainingJobArtifactsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"summary": "Get the artifacts of a training job.",
"description": "Get the artifacts of a training job.",
"parameters": [
{
"name": "job_uuid",
"in": "query",
"description": "The UUID of the job to get the artifacts of.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/post-training/job/artifacts": { "/v1/post-training/job/artifacts": {
"get": { "get": {
"responses": { "responses": {
@ -2123,6 +2337,50 @@
] ]
} }
}, },
"/v1alpha/post-training/job/status": {
"get": {
"responses": {
"200": {
"description": "A PostTrainingJobStatusResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"summary": "Get the status of a training job.",
"description": "Get the status of a training job.",
"parameters": [
{
"name": "job_uuid",
"in": "query",
"description": "The UUID of the job to get the status of.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/post-training/job/status": { "/v1/post-training/job/status": {
"get": { "get": {
"responses": { "responses": {
@ -2167,6 +2425,40 @@
] ]
} }
}, },
"/v1alpha/post-training/jobs": {
"get": {
"responses": {
"200": {
"description": "A ListPostTrainingJobsResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListPostTrainingJobsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"summary": "Get all training jobs.",
"description": "Get all training jobs.",
"parameters": []
}
},
"/v1/post-training/jobs": { "/v1/post-training/jobs": {
"get": { "get": {
"responses": { "responses": {
@ -2538,6 +2830,103 @@
] ]
} }
}, },
"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
"get": {
"responses": {
"200": {
"description": "The status of the evaluation job.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Job"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Eval"
],
"summary": "Get the status of a job.",
"description": "Get the status of a job.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "job_id",
"in": "path",
"description": "The ID of the job to get the status of.",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Eval"
],
"summary": "Cancel a job.",
"description": "Cancel a job.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "job_id",
"in": "path",
"description": "The ID of the job to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
"get": { "get": {
"responses": { "responses": {
@ -2635,6 +3024,59 @@
] ]
} }
}, },
"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
"get": {
"responses": {
"200": {
"description": "The result of the job.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluateResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Eval"
],
"summary": "Get the result of a job.",
"description": "Get the result of a job.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "job_id",
"in": "path",
"description": "The ID of the job to get the result of.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": { "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
"get": { "get": {
"responses": { "responses": {
@ -2750,6 +3192,75 @@
] ]
} }
}, },
"/v1alpha/eval/benchmarks": {
"get": {
"responses": {
"200": {
"description": "A ListBenchmarksResponse.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBenchmarksResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Benchmarks"
],
"summary": "List all benchmarks.",
"description": "List all benchmarks.",
"parameters": []
},
"post": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Benchmarks"
],
"summary": "Register a benchmark.",
"description": "Register a benchmark.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/RegisterBenchmarkRequest"
}
}
},
"required": true
}
}
},
"/v1/eval/benchmarks": { "/v1/eval/benchmarks": {
"get": { "get": {
"responses": { "responses": {
@ -4783,6 +5294,50 @@
} }
} }
}, },
"/v1alpha/post-training/preference-optimize": {
"post": {
"responses": {
"200": {
"description": "A PostTrainingJob.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PostTrainingJob"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"summary": "Run preference optimization of a model.",
"description": "Run preference optimization of a model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PreferenceOptimizeRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/preference-optimize": { "/v1/post-training/preference-optimize": {
"post": { "post": {
"responses": { "responses": {
@ -5178,6 +5733,60 @@
} }
} }
}, },
"/v1alpha/eval/benchmarks/{benchmark_id}/jobs": {
"post": {
"responses": {
"200": {
"description": "The job that was created to run the evaluation.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Job"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Eval"
],
"summary": "Run an evaluation on a benchmark.",
"description": "Run an evaluation on a benchmark.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/RunEvalRequest"
}
}
},
"required": true
}
}
},
"/v1/eval/benchmarks/{benchmark_id}/jobs": { "/v1/eval/benchmarks/{benchmark_id}/jobs": {
"post": { "post": {
"responses": { "responses": {
@ -5499,6 +6108,50 @@
} }
} }
}, },
"/v1alpha/post-training/supervised-fine-tune": {
"post": {
"responses": {
"200": {
"description": "A PostTrainingJob.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PostTrainingJob"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"summary": "Run supervised fine-tuning of a model.",
"description": "Run supervised fine-tuning of a model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SupervisedFineTuneRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/supervised-fine-tune": { "/v1/post-training/supervised-fine-tune": {
"post": { "post": {
"responses": { "responses": {

View file

@ -109,6 +109,32 @@ paths:
schema: schema:
$ref: '#/components/schemas/BatchCompletionRequest' $ref: '#/components/schemas/BatchCompletionRequest'
required: true required: true
/v1alpha/post-training/job/cancel:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Cancel a training job.
description: Cancel a training job.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CancelTrainingJobRequest'
required: true
/v1/post-training/job/cancel: /v1/post-training/job/cancel:
post: post:
responses: responses:
@ -832,6 +858,44 @@ paths:
schema: schema:
$ref: '#/components/schemas/EmbeddingsRequest' $ref: '#/components/schemas/EmbeddingsRequest'
required: true required: true
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post:
responses:
'200':
description: >-
EvaluateResponse object containing generations and scores.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Evaluate a list of rows on a benchmark.
description: Evaluate a list of rows on a benchmark.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateRowsRequest'
required: true
/v1/eval/benchmarks/{benchmark_id}/evaluations: /v1/eval/benchmarks/{benchmark_id}/evaluations:
post: post:
responses: responses:
@ -962,6 +1026,61 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1alpha/eval/benchmarks/{benchmark_id}:
get:
responses:
'200':
description: A Benchmark.
content:
application/json:
schema:
$ref: '#/components/schemas/Benchmark'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Get a benchmark by its ID.
description: Get a benchmark by its ID.
parameters:
- name: benchmark_id
in: path
description: The ID of the benchmark to get.
required: true
schema:
type: string
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Unregister a benchmark.
description: Unregister a benchmark.
parameters:
- name: benchmark_id
in: path
description: The ID of the benchmark to unregister.
required: true
schema:
type: string
/v1/eval/benchmarks/{benchmark_id}: /v1/eval/benchmarks/{benchmark_id}:
get: get:
responses: responses:
@ -1458,6 +1577,37 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1alpha/post-training/job/artifacts:
get:
responses:
'200':
description: A PostTrainingJobArtifactsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Get the artifacts of a training job.
description: Get the artifacts of a training job.
parameters:
- name: job_uuid
in: query
description: >-
The UUID of the job to get the artifacts of.
required: true
schema:
type: string
/v1/post-training/job/artifacts: /v1/post-training/job/artifacts:
get: get:
responses: responses:
@ -1489,6 +1639,37 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1alpha/post-training/job/status:
get:
responses:
'200':
description: A PostTrainingJobStatusResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Get the status of a training job.
description: Get the status of a training job.
parameters:
- name: job_uuid
in: query
description: >-
The UUID of the job to get the status of.
required: true
schema:
type: string
/v1/post-training/job/status: /v1/post-training/job/status:
get: get:
responses: responses:
@ -1520,6 +1701,30 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1alpha/post-training/jobs:
get:
responses:
'200':
description: A ListPostTrainingJobsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListPostTrainingJobsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Get all training jobs.
description: Get all training jobs.
parameters: []
/v1/post-training/jobs: /v1/post-training/jobs:
get: get:
responses: responses:
@ -1804,6 +2009,75 @@ paths:
required: false required: false
schema: schema:
type: integer type: integer
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get:
responses:
'200':
description: The status of the evaluation job.
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Get the status of a job.
description: Get the status of a job.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
description: The ID of the job to get the status of.
required: true
schema:
type: string
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Cancel a job.
description: Cancel a job.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
description: The ID of the job to cancel.
required: true
schema:
type: string
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get: get:
responses: responses:
@ -1873,6 +2147,43 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
get:
responses:
'200':
description: The result of the job.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Get the result of a job.
description: Get the result of a job.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
description: The ID of the job to get the result of.
required: true
schema:
type: string
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
get: get:
responses: responses:
@ -1953,6 +2264,55 @@ paths:
required: false required: false
schema: schema:
type: integer type: integer
/v1alpha/eval/benchmarks:
get:
responses:
'200':
description: A ListBenchmarksResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBenchmarksResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: List all benchmarks.
description: List all benchmarks.
parameters: []
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Register a benchmark.
description: Register a benchmark.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true
/v1/eval/benchmarks: /v1/eval/benchmarks:
get: get:
responses: responses:
@ -3454,6 +3814,36 @@ paths:
schema: schema:
$ref: '#/components/schemas/OpenaiSearchVectorStoreRequest' $ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
required: true required: true
/v1alpha/post-training/preference-optimize:
post:
responses:
'200':
description: A PostTrainingJob.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Run preference optimization of a model.
description: Run preference optimization of a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PreferenceOptimizeRequest'
required: true
/v1/post-training/preference-optimize: /v1/post-training/preference-optimize:
post: post:
responses: responses:
@ -3735,6 +4125,44 @@ paths:
schema: schema:
$ref: '#/components/schemas/ResumeAgentTurnRequest' $ref: '#/components/schemas/ResumeAgentTurnRequest'
required: true required: true
/v1alpha/eval/benchmarks/{benchmark_id}/jobs:
post:
responses:
'200':
description: >-
The job that was created to run the evaluation.
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Run an evaluation on a benchmark.
description: Run an evaluation on a benchmark.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RunEvalRequest'
required: true
/v1/eval/benchmarks/{benchmark_id}/jobs: /v1/eval/benchmarks/{benchmark_id}/jobs:
post: post:
responses: responses:
@ -3961,6 +4389,36 @@ paths:
schema: schema:
$ref: '#/components/schemas/SetDefaultVersionRequest' $ref: '#/components/schemas/SetDefaultVersionRequest'
required: true required: true
/v1alpha/post-training/supervised-fine-tune:
post:
responses:
'200':
description: A PostTrainingJob.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Run supervised fine-tuning of a model.
description: Run supervised fine-tuning of a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true
/v1/post-training/supervised-fine-tune: /v1/post-training/supervised-fine-tune:
post: post:
responses: responses:

View file

@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
) )
from llama_stack.apis.safety import SafetyViolation from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef from llama_stack.apis.tools import ToolDef
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .openai_responses import ( from .openai_responses import (
@ -481,7 +482,7 @@ class Agents(Protocol):
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details. - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
""" """
@webmethod(route="/agents", method="POST", descriptive_name="create_agent") @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
async def create_agent( async def create_agent(
self, self,
agent_config: AgentConfig, agent_config: AgentConfig,
@ -494,7 +495,10 @@ class Agents(Protocol):
... ...
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn" route="/agents/{agent_id}/session/{session_id}/turn",
method="POST",
descriptive_name="create_agent_turn",
level=LLAMA_STACK_API_V1,
) )
async def create_agent_turn( async def create_agent_turn(
self, self,
@ -524,6 +528,7 @@ class Agents(Protocol):
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST", method="POST",
descriptive_name="resume_agent_turn", descriptive_name="resume_agent_turn",
level=LLAMA_STACK_API_V1,
) )
async def resume_agent_turn( async def resume_agent_turn(
self, self,
@ -549,6 +554,7 @@ class Agents(Protocol):
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET", method="GET",
level=LLAMA_STACK_API_V1,
) )
async def get_agents_turn( async def get_agents_turn(
self, self,
@ -568,6 +574,7 @@ class Agents(Protocol):
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET", method="GET",
level=LLAMA_STACK_API_V1,
) )
async def get_agents_step( async def get_agents_step(
self, self,
@ -586,7 +593,12 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session") @webmethod(
route="/agents/{agent_id}/session",
method="POST",
descriptive_name="create_agent_session",
level=LLAMA_STACK_API_V1,
)
async def create_agent_session( async def create_agent_session(
self, self,
agent_id: str, agent_id: str,
@ -600,7 +612,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET") @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_agents_session( async def get_agents_session(
self, self,
session_id: str, session_id: str,
@ -616,7 +628,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE") @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_agents_session( async def delete_agents_session(
self, self,
session_id: str, session_id: str,
@ -629,7 +641,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents/{agent_id}", method="DELETE") @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_agent( async def delete_agent(
self, self,
agent_id: str, agent_id: str,
@ -640,7 +652,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents", method="GET") @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse: async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
"""List all agents. """List all agents.
@ -650,7 +662,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents/{agent_id}", method="GET") @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_agent(self, agent_id: str) -> Agent: async def get_agent(self, agent_id: str) -> Agent:
"""Describe an agent by its ID. """Describe an agent by its ID.
@ -659,7 +671,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents/{agent_id}/sessions", method="GET") @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
async def list_agent_sessions( async def list_agent_sessions(
self, self,
agent_id: str, agent_id: str,
@ -682,7 +694,7 @@ class Agents(Protocol):
# #
# Both of these APIs are inherently stateful. # Both of these APIs are inherently stateful.
@webmethod(route="/openai/v1/responses/{response_id}", method="GET") @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_openai_response( async def get_openai_response(
self, self,
response_id: str, response_id: str,
@ -694,7 +706,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses", method="POST") @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
async def create_openai_response( async def create_openai_response(
self, self,
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
@ -719,7 +731,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses", method="GET") @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_responses( async def list_openai_responses(
self, self,
after: str | None = None, after: str | None = None,
@ -737,7 +749,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET") @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_response_input_items( async def list_openai_response_input_items(
self, self,
response_id: str, response_id: str,
@ -759,7 +771,7 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE") @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete an OpenAI response by its ID. """Delete an OpenAI response by its ID.

View file

@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import webmethod from llama_stack.schema_utils import webmethod
@ -30,7 +31,7 @@ class BatchInference(Protocol):
including (post-training, evals, etc). including (post-training, evals, etc).
""" """
@webmethod(route="/batch-inference/completion", method="POST") @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion( async def completion(
self, self,
model: str, model: str,
@ -50,7 +51,7 @@ class BatchInference(Protocol):
""" """
... ...
@webmethod(route="/batch-inference/chat-completion", method="POST") @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion( async def chat_completion(
self, self,
model: str, model: str,

View file

@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
try: try:
@ -42,7 +43,7 @@ class Batches(Protocol):
Note: This API is currently under active development and may undergo changes. Note: This API is currently under active development and may undergo changes.
""" """
@webmethod(route="/openai/v1/batches", method="POST") @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
async def create_batch( async def create_batch(
self, self,
input_file_id: str, input_file_id: str,
@ -62,7 +63,7 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET") @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve_batch(self, batch_id: str) -> BatchObject: async def retrieve_batch(self, batch_id: str) -> BatchObject:
"""Retrieve information about a specific batch. """Retrieve information about a specific batch.
@ -71,7 +72,7 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST") @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
async def cancel_batch(self, batch_id: str) -> BatchObject: async def cancel_batch(self, batch_id: str) -> BatchObject:
"""Cancel a batch that is in progress. """Cancel a batch that is in progress.
@ -80,7 +81,7 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches", method="GET") @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
async def list_batches( async def list_batches(
self, self,
after: str | None = None, after: str | None = None,

View file

@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
@runtime_checkable @runtime_checkable
class Benchmarks(Protocol): class Benchmarks(Protocol):
@webmethod(route="/eval/benchmarks", method="GET") @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_benchmarks(self) -> ListBenchmarksResponse: async def list_benchmarks(self) -> ListBenchmarksResponse:
"""List all benchmarks. """List all benchmarks.
@ -61,7 +63,8 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET") @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_benchmark( async def get_benchmark(
self, self,
benchmark_id: str, benchmark_id: str,
@ -73,7 +76,8 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks", method="POST") @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def register_benchmark( async def register_benchmark(
self, self,
benchmark_id: str, benchmark_id: str,
@ -94,7 +98,8 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE") @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def unregister_benchmark(self, benchmark_id: str) -> None: async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark. """Unregister a benchmark.

View file

@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasets import Dataset from llama_stack.apis.datasets import Dataset
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import webmethod from llama_stack.schema_utils import webmethod
@ -20,7 +21,7 @@ class DatasetIO(Protocol):
# keeping for aligning with inference/safety, but this is not used # keeping for aligning with inference/safety, but this is not used
dataset_store: DatasetStore dataset_store: DatasetStore
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET") @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def iterrows( async def iterrows(
self, self,
dataset_id: str, dataset_id: str,
@ -44,7 +45,7 @@ class DatasetIO(Protocol):
""" """
... ...
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST") @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
"""Append rows to a dataset. """Append rows to a dataset.

View file

@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
class Datasets(Protocol): class Datasets(Protocol):
@webmethod(route="/datasets", method="POST") @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
async def register_dataset( async def register_dataset(
self, self,
purpose: DatasetPurpose, purpose: DatasetPurpose,
@ -214,7 +215,7 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="GET") @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_dataset( async def get_dataset(
self, self,
dataset_id: str, dataset_id: str,
@ -226,7 +227,7 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets", method="GET") @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
async def list_datasets(self) -> ListDatasetsResponse: async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets. """List all datasets.
@ -234,7 +235,7 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE") @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_dataset( async def unregister_dataset(
self, self,
dataset_id: str, dataset_id: str,

View file

@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult from llama_stack.apis.scoring import ScoringResult
from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
class Eval(Protocol): class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates.""" """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST") @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def run_eval( async def run_eval(
self, self,
benchmark_id: str, benchmark_id: str,
@ -97,7 +99,10 @@ class Eval(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST") @webmethod(
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def evaluate_rows( async def evaluate_rows(
self, self,
benchmark_id: str, benchmark_id: str,
@ -115,7 +120,10 @@ class Eval(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") @webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def job_status(self, benchmark_id: str, job_id: str) -> Job: async def job_status(self, benchmark_id: str, job_id: str) -> Job:
"""Get the status of a job. """Get the status of a job.
@ -125,7 +133,13 @@ class Eval(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE") @webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def job_cancel(self, benchmark_id: str, job_id: str) -> None: async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job. """Cancel a job.
@ -134,7 +148,15 @@ class Eval(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET") @webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
)
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
"""Get the result of a job. """Get the result of a job.

View file

@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol @trace_protocol
class Files(Protocol): class Files(Protocol):
# OpenAI Files API Endpoints # OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST") @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_upload_file( async def openai_upload_file(
self, self,
file: Annotated[UploadFile, File()], file: Annotated[UploadFile, File()],
@ -127,7 +128,7 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files", method="GET") @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files( async def openai_list_files(
self, self,
after: str | None = None, after: str | None = None,
@ -146,7 +147,7 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}", method="GET") @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file( async def openai_retrieve_file(
self, self,
file_id: str, file_id: str,
@ -159,7 +160,7 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE") @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_file( async def openai_delete_file(
self, self,
file_id: str, file_id: str,
@ -172,7 +173,7 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET") @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file_content( async def openai_retrieve_file_content(
self, self,
file_id: str, file_id: str,

View file

@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.apis.telemetry import MetricResponseMixin from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
StopReason, StopReason,
@ -1026,7 +1027,7 @@ class InferenceProvider(Protocol):
model_store: ModelStore | None = None model_store: ModelStore | None = None
@webmethod(route="/inference/completion", method="POST") @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion( async def completion(
self, self,
model_id: str, model_id: str,
@ -1049,7 +1050,7 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/inference/batch-completion", method="POST", experimental=True) @webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def batch_completion( async def batch_completion(
self, self,
model_id: str, model_id: str,
@ -1070,7 +1071,7 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Batch completion is not implemented") raise NotImplementedError("Batch completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/chat-completion", method="POST") @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion( async def chat_completion(
self, self,
model_id: str, model_id: str,
@ -1110,7 +1111,7 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True) @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def batch_chat_completion( async def batch_chat_completion(
self, self,
model_id: str, model_id: str,
@ -1135,7 +1136,7 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Batch chat completion is not implemented") raise NotImplementedError("Batch chat completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/embeddings", method="POST") @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def embeddings( async def embeddings(
self, self,
model_id: str, model_id: str,
@ -1155,7 +1156,7 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/inference/rerank", method="POST", experimental=True) @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def rerank( async def rerank(
self, self,
model: str, model: str,
@ -1174,7 +1175,7 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Reranking is not implemented") raise NotImplementedError("Reranking is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/openai/v1/completions", method="POST") @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_completion( async def openai_completion(
self, self,
# Standard OpenAI completion parameters # Standard OpenAI completion parameters
@ -1225,7 +1226,7 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/chat/completions", method="POST") @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_chat_completion( async def openai_chat_completion(
self, self,
model: str, model: str,
@ -1281,7 +1282,7 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/embeddings", method="POST") @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings( async def openai_embeddings(
self, self,
model: str, model: str,
@ -1310,7 +1311,7 @@ class Inference(InferenceProvider):
- Embedding models: these models generate embeddings to be used for semantic search. - Embedding models: these models generate embeddings to be used for semantic search.
""" """
@webmethod(route="/openai/v1/chat/completions", method="GET") @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
async def list_chat_completions( async def list_chat_completions(
self, self,
after: str | None = None, after: str | None = None,
@ -1328,7 +1329,7 @@ class Inference(InferenceProvider):
""" """
raise NotImplementedError("List chat completions is not implemented") raise NotImplementedError("List chat completions is not implemented")
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET") @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Describe a chat completion by its ID. """Describe a chat completion by its ID.

View file

@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.datatypes import HealthStatus
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
@runtime_checkable @runtime_checkable
class Inspect(Protocol): class Inspect(Protocol):
@webmethod(route="/inspect/routes", method="GET") @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
async def list_routes(self) -> ListRoutesResponse: async def list_routes(self) -> ListRoutesResponse:
"""List all available API routes with their methods and implementing providers. """List all available API routes with their methods and implementing providers.
@ -65,7 +66,7 @@ class Inspect(Protocol):
""" """
... ...
@webmethod(route="/health", method="GET") @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
async def health(self) -> HealthInfo: async def health(self) -> HealthInfo:
"""Get the current health status of the service. """Get the current health status of the service.
@ -73,7 +74,7 @@ class Inspect(Protocol):
""" """
... ...
@webmethod(route="/version", method="GET") @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
async def version(self) -> VersionInfo: async def version(self) -> VersionInfo:
"""Get the version of the service. """Get the version of the service.

View file

@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, ConfigDict, Field, field_validator from pydantic import BaseModel, ConfigDict, Field, field_validator
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class Models(Protocol): class Models(Protocol):
@webmethod(route="/models", method="GET") @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def list_models(self) -> ListModelsResponse: async def list_models(self) -> ListModelsResponse:
"""List all models. """List all models.
@ -110,7 +111,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/models", method="GET") @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_models(self) -> OpenAIListModelsResponse: async def openai_list_models(self) -> OpenAIListModelsResponse:
"""List models using the OpenAI API. """List models using the OpenAI API.
@ -118,7 +119,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/models/{model_id:path}", method="GET") @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_model( async def get_model(
self, self,
model_id: str, model_id: str,
@ -130,7 +131,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/models", method="POST") @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
async def register_model( async def register_model(
self, self,
model_id: str, model_id: str,
@ -150,7 +151,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/models/{model_id:path}", method="DELETE") @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_model( async def unregister_model(
self, self,
model_id: str, model_id: str,

View file

@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.job_types import JobStatus from llama_stack.apis.common.job_types import JobStatus
from llama_stack.apis.common.training_types import Checkpoint from llama_stack.apis.common.training_types import Checkpoint
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel):
class PostTraining(Protocol): class PostTraining(Protocol):
@webmethod(route="/post-training/supervised-fine-tune", method="POST") @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def supervised_fine_tune( async def supervised_fine_tune(
self, self,
job_uuid: str, job_uuid: str,
@ -310,7 +312,8 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/preference-optimize", method="POST") @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def preference_optimize( async def preference_optimize(
self, self,
job_uuid: str, job_uuid: str,
@ -332,7 +335,8 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/jobs", method="GET") @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_jobs(self) -> ListPostTrainingJobsResponse: async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
"""Get all training jobs. """Get all training jobs.
@ -340,7 +344,8 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/status", method="GET") @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
"""Get the status of a training job. """Get the status of a training job.
@ -349,7 +354,8 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/cancel", method="POST") @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def cancel_training_job(self, job_uuid: str) -> None: async def cancel_training_job(self, job_uuid: str) -> None:
"""Cancel a training job. """Cancel a training job.
@ -357,7 +363,8 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/artifacts", method="GET") @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
"""Get the artifacts of a training job. """Get the artifacts of a training job.

View file

@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable
from pydantic import BaseModel, Field, field_validator, model_validator from pydantic import BaseModel, Field, field_validator, model_validator
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -95,7 +96,7 @@ class ListPromptsResponse(BaseModel):
class Prompts(Protocol): class Prompts(Protocol):
"""Protocol for prompt management operations.""" """Protocol for prompt management operations."""
@webmethod(route="/prompts", method="GET") @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
async def list_prompts(self) -> ListPromptsResponse: async def list_prompts(self) -> ListPromptsResponse:
"""List all prompts. """List all prompts.
@ -103,7 +104,7 @@ class Prompts(Protocol):
""" """
... ...
@webmethod(route="/prompts/{prompt_id}/versions", method="GET") @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
async def list_prompt_versions( async def list_prompt_versions(
self, self,
prompt_id: str, prompt_id: str,
@ -115,7 +116,7 @@ class Prompts(Protocol):
""" """
... ...
@webmethod(route="/prompts/{prompt_id}", method="GET") @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_prompt( async def get_prompt(
self, self,
prompt_id: str, prompt_id: str,
@ -129,7 +130,7 @@ class Prompts(Protocol):
""" """
... ...
@webmethod(route="/prompts", method="POST") @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
async def create_prompt( async def create_prompt(
self, self,
prompt: str, prompt: str,
@ -143,7 +144,7 @@ class Prompts(Protocol):
""" """
... ...
@webmethod(route="/prompts/{prompt_id}", method="PUT") @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
async def update_prompt( async def update_prompt(
self, self,
prompt_id: str, prompt_id: str,
@ -163,7 +164,7 @@ class Prompts(Protocol):
""" """
... ...
@webmethod(route="/prompts/{prompt_id}", method="DELETE") @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_prompt( async def delete_prompt(
self, self,
prompt_id: str, prompt_id: str,
@ -174,7 +175,7 @@ class Prompts(Protocol):
""" """
... ...
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT") @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
async def set_default_version( async def set_default_version(
self, self,
prompt_id: str, prompt_id: str,

View file

@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.datatypes import HealthResponse from llama_stack.providers.datatypes import HealthResponse
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -45,7 +46,7 @@ class Providers(Protocol):
Providers API for inspecting, listing, and modifying providers and their configurations. Providers API for inspecting, listing, and modifying providers and their configurations.
""" """
@webmethod(route="/providers", method="GET") @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
async def list_providers(self) -> ListProvidersResponse: async def list_providers(self) -> ListProvidersResponse:
"""List all available providers. """List all available providers.
@ -53,7 +54,7 @@ class Providers(Protocol):
""" """
... ...
@webmethod(route="/providers/{provider_id}", method="GET") @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
async def inspect_provider(self, provider_id: str) -> ProviderInfo: async def inspect_provider(self, provider_id: str) -> ProviderInfo:
"""Get detailed information about a specific provider. """Get detailed information about a specific provider.

View file

@ -11,6 +11,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -97,7 +98,7 @@ class ShieldStore(Protocol):
class Safety(Protocol): class Safety(Protocol):
shield_store: ShieldStore shield_store: ShieldStore
@webmethod(route="/safety/run-shield", method="POST") @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
async def run_shield( async def run_shield(
self, self,
shield_id: str, shield_id: str,
@ -113,7 +114,7 @@ class Safety(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/moderations", method="POST") @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject: async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
"""Classifies if text and/or image inputs are potentially harmful. """Classifies if text and/or image inputs are potentially harmful.
:param input: Input (or inputs) to classify. :param input: Input (or inputs) to classify.

View file

@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
# mapping of metric to value # mapping of metric to value
@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol):
class Scoring(Protocol): class Scoring(Protocol):
scoring_function_store: ScoringFunctionStore scoring_function_store: ScoringFunctionStore
@webmethod(route="/scoring/score-batch", method="POST") @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
async def score_batch( async def score_batch(
self, self,
dataset_id: str, dataset_id: str,
@ -77,7 +78,7 @@ class Scoring(Protocol):
""" """
... ...
@webmethod(route="/scoring/score", method="POST") @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
async def score( async def score(
self, self,
input_rows: list[dict[str, Any]], input_rows: list[dict[str, Any]],

View file

@ -18,6 +18,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.type_system import ParamType from llama_stack.apis.common.type_system import ParamType
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel):
@runtime_checkable @runtime_checkable
class ScoringFunctions(Protocol): class ScoringFunctions(Protocol):
@webmethod(route="/scoring-functions", method="GET") @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
async def list_scoring_functions(self) -> ListScoringFunctionsResponse: async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
"""List all scoring functions. """List all scoring functions.
@ -168,7 +169,7 @@ class ScoringFunctions(Protocol):
""" """
... ...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET") @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
"""Get a scoring function by its ID. """Get a scoring function by its ID.
@ -177,7 +178,7 @@ class ScoringFunctions(Protocol):
""" """
... ...
@webmethod(route="/scoring-functions", method="POST") @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
async def register_scoring_function( async def register_scoring_function(
self, self,
scoring_fn_id: str, scoring_fn_id: str,
@ -198,7 +199,7 @@ class ScoringFunctions(Protocol):
""" """
... ...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE") @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_scoring_function(self, scoring_fn_id: str) -> None: async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
"""Unregister a scoring function. """Unregister a scoring function.

View file

@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class Shields(Protocol): class Shields(Protocol):
@webmethod(route="/shields", method="GET") @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
async def list_shields(self) -> ListShieldsResponse: async def list_shields(self) -> ListShieldsResponse:
"""List all shields. """List all shields.
@ -57,7 +58,7 @@ class Shields(Protocol):
""" """
... ...
@webmethod(route="/shields/{identifier:path}", method="GET") @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_shield(self, identifier: str) -> Shield: async def get_shield(self, identifier: str) -> Shield:
"""Get a shield by its identifier. """Get a shield by its identifier.
@ -66,7 +67,7 @@ class Shields(Protocol):
""" """
... ...
@webmethod(route="/shields", method="POST") @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
async def register_shield( async def register_shield(
self, self,
shield_id: str, shield_id: str,
@ -84,7 +85,7 @@ class Shields(Protocol):
""" """
... ...
@webmethod(route="/shields/{identifier:path}", method="DELETE") @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_shield(self, identifier: str) -> None: async def unregister_shield(self, identifier: str) -> None:
"""Unregister a shield. """Unregister a shield.

View file

@ -10,6 +10,7 @@ from typing import Any, Protocol
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel):
class SyntheticDataGeneration(Protocol): class SyntheticDataGeneration(Protocol):
@webmethod(route="/synthetic-data-generation/generate") @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
def synthetic_data_generate( def synthetic_data_generate(
self, self,
dialogs: list[Message], dialogs: list[Message],

View file

@ -16,6 +16,7 @@ from typing import (
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.models.llama.datatypes import Primitive from llama_stack.models.llama.datatypes import Primitive
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel):
@runtime_checkable @runtime_checkable
class Telemetry(Protocol): class Telemetry(Protocol):
@webmethod(route="/telemetry/events", method="POST") @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
async def log_event( async def log_event(
self, self,
event: Event, event: Event,
@ -425,7 +426,7 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE) @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
async def query_traces( async def query_traces(
self, self,
attribute_filters: list[QueryCondition] | None = None, attribute_filters: list[QueryCondition] | None = None,
@ -443,7 +444,9 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE) @webmethod(
route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
)
async def get_trace(self, trace_id: str) -> Trace: async def get_trace(self, trace_id: str) -> Trace:
"""Get a trace by its ID. """Get a trace by its ID.
@ -453,7 +456,10 @@ class Telemetry(Protocol):
... ...
@webmethod( @webmethod(
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1,
) )
async def get_span(self, trace_id: str, span_id: str) -> Span: async def get_span(self, trace_id: str, span_id: str) -> Span:
"""Get a span by its ID. """Get a span by its ID.
@ -464,7 +470,12 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE) @webmethod(
route="/telemetry/spans/{span_id:path}/tree",
method="POST",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1,
)
async def get_span_tree( async def get_span_tree(
self, self,
span_id: str, span_id: str,
@ -480,7 +491,7 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE) @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
async def query_spans( async def query_spans(
self, self,
attribute_filters: list[QueryCondition], attribute_filters: list[QueryCondition],
@ -496,7 +507,7 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/spans/export", method="POST") @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
async def save_spans_to_dataset( async def save_spans_to_dataset(
self, self,
attribute_filters: list[QueryCondition], attribute_filters: list[QueryCondition],
@ -513,7 +524,9 @@ class Telemetry(Protocol):
""" """
... ...
@webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE) @webmethod(
route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
)
async def query_metrics( async def query_metrics(
self, self,
metric_name: str, metric_name: str,

View file

@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator
from typing_extensions import runtime_checkable from typing_extensions import runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class RAGToolRuntime(Protocol): class RAGToolRuntime(Protocol):
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST") @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert( async def insert(
self, self,
documents: list[RAGDocument], documents: list[RAGDocument],
@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol):
""" """
... ...
@webmethod(route="/tool-runtime/rag-tool/query", method="POST") @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
async def query( async def query(
self, self,
content: InterleavedContent, content: InterleavedContent,

View file

@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -151,7 +152,7 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class ToolGroups(Protocol): class ToolGroups(Protocol):
@webmethod(route="/toolgroups", method="POST") @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
async def register_tool_group( async def register_tool_group(
self, self,
toolgroup_id: str, toolgroup_id: str,
@ -168,7 +169,7 @@ class ToolGroups(Protocol):
""" """
... ...
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET") @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_tool_group( async def get_tool_group(
self, self,
toolgroup_id: str, toolgroup_id: str,
@ -180,7 +181,7 @@ class ToolGroups(Protocol):
""" """
... ...
@webmethod(route="/toolgroups", method="GET") @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
async def list_tool_groups(self) -> ListToolGroupsResponse: async def list_tool_groups(self) -> ListToolGroupsResponse:
"""List tool groups with optional provider. """List tool groups with optional provider.
@ -188,7 +189,7 @@ class ToolGroups(Protocol):
""" """
... ...
@webmethod(route="/tools", method="GET") @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse: async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
"""List tools with optional tool group. """List tools with optional tool group.
@ -197,7 +198,7 @@ class ToolGroups(Protocol):
""" """
... ...
@webmethod(route="/tools/{tool_name:path}", method="GET") @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_tool( async def get_tool(
self, self,
tool_name: str, tool_name: str,
@ -209,7 +210,7 @@ class ToolGroups(Protocol):
""" """
... ...
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE") @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_toolgroup( async def unregister_toolgroup(
self, self,
toolgroup_id: str, toolgroup_id: str,
@ -238,7 +239,7 @@ class ToolRuntime(Protocol):
rag_tool: RAGToolRuntime | None = None rag_tool: RAGToolRuntime | None = None
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed. # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
@webmethod(route="/tool-runtime/list-tools", method="GET") @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
async def list_runtime_tools( async def list_runtime_tools(
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
) -> ListToolDefsResponse: ) -> ListToolDefsResponse:
@ -250,7 +251,7 @@ class ToolRuntime(Protocol):
""" """
... ...
@webmethod(route="/tool-runtime/invoke", method="POST") @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
"""Run a tool with the given arguments. """Run a tool with the given arguments.

View file

@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class VectorDBs(Protocol): class VectorDBs(Protocol):
@webmethod(route="/vector-dbs", method="GET") @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
async def list_vector_dbs(self) -> ListVectorDBsResponse: async def list_vector_dbs(self) -> ListVectorDBsResponse:
"""List all vector databases. """List all vector databases.
@ -73,7 +74,7 @@ class VectorDBs(Protocol):
""" """
... ...
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET") @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_vector_db( async def get_vector_db(
self, self,
vector_db_id: str, vector_db_id: str,
@ -85,7 +86,7 @@ class VectorDBs(Protocol):
""" """
... ...
@webmethod(route="/vector-dbs", method="POST") @webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1)
async def register_vector_db( async def register_vector_db(
self, self,
vector_db_id: str, vector_db_id: str,
@ -107,7 +108,7 @@ class VectorDBs(Protocol):
""" """
... ...
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE") @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_db(self, vector_db_id: str) -> None:
"""Unregister a vector database. """Unregister a vector database.

View file

@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -437,7 +438,7 @@ class VectorIO(Protocol):
# this will just block now until chunks are inserted, but it should # this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion # probably return a Job instance which can be polled for completion
@webmethod(route="/vector-io/insert", method="POST") @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert_chunks( async def insert_chunks(
self, self,
vector_db_id: str, vector_db_id: str,
@ -455,7 +456,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/vector-io/query", method="POST") @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
async def query_chunks( async def query_chunks(
self, self,
vector_db_id: str, vector_db_id: str,
@ -472,7 +473,7 @@ class VectorIO(Protocol):
... ...
# OpenAI Vector Stores API endpoints # OpenAI Vector Stores API endpoints
@webmethod(route="/openai/v1/vector_stores", method="POST") @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store( async def openai_create_vector_store(
self, self,
name: str | None = None, name: str | None = None,
@ -498,7 +499,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores", method="GET") @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_vector_stores( async def openai_list_vector_stores(
self, self,
limit: int | None = 20, limit: int | None = 20,
@ -516,7 +517,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET") @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store( async def openai_retrieve_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
@ -528,7 +529,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST") @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
async def openai_update_vector_store( async def openai_update_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
@ -546,7 +547,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE") @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_vector_store( async def openai_delete_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
@ -558,7 +559,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST") @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
async def openai_search_vector_store( async def openai_search_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
@ -584,7 +585,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST") @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_attach_file_to_vector_store( async def openai_attach_file_to_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
@ -602,7 +603,7 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET") @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files_in_vector_store( async def openai_list_files_in_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
@ -624,7 +625,9 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET") @webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
)
async def openai_retrieve_vector_store_file( async def openai_retrieve_vector_store_file(
self, self,
vector_store_id: str, vector_store_id: str,
@ -638,7 +641,11 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET") @webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
level=LLAMA_STACK_API_V1,
)
async def openai_retrieve_vector_store_file_contents( async def openai_retrieve_vector_store_file_contents(
self, self,
vector_store_id: str, vector_store_id: str,
@ -652,7 +659,9 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST") @webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
)
async def openai_update_vector_store_file( async def openai_update_vector_store_file(
self, self,
vector_store_id: str, vector_store_id: str,
@ -668,7 +677,9 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE") @webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
)
async def openai_delete_vector_store_file( async def openai_delete_vector_store_file(
self, self,
vector_store_id: str, vector_store_id: str,

View file

@ -4,4 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
LLAMA_STACK_API_VERSION = "v1" LLAMA_STACK_API_V1 = "v1"
LLAMA_STACK_API_V1BETA = "v1beta"
LLAMA_STACK_API_V1ALPHA = "v1alpha"

View file

@ -15,7 +15,6 @@ import httpx
from pydantic import BaseModel, parse_obj_as from pydantic import BaseModel, parse_obj_as
from termcolor import cprint from termcolor import cprint
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
from llama_stack.providers.datatypes import RemoteProviderConfig from llama_stack.providers.datatypes import RemoteProviderConfig
_CLIENT_CLASSES = {} _CLIENT_CLASSES = {}
@ -114,7 +113,24 @@ def create_api_client_class(protocol) -> type:
break break
kwargs[param.name] = args[i] kwargs[param.name] = args[i]
url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}" # Get all webmethods for this method (supports multiple decorators)
webmethods = getattr(method, "__webmethods__", [])
if not webmethods:
raise RuntimeError(f"Method {method} has no webmethod decorators")
# Choose the preferred webmethod (non-deprecated if available)
preferred_webmethod = None
for wm in webmethods:
if not getattr(wm, "deprecated", False):
preferred_webmethod = wm
break
# If no non-deprecated found, use the first one
if preferred_webmethod is None:
preferred_webmethod = webmethods[0]
url = f"{self.base_url}/{preferred_webmethod.level}/{preferred_webmethod.route.lstrip('/')}"
def convert(value): def convert(value):
if isinstance(value, list): if isinstance(value, list):

View file

@ -14,7 +14,6 @@ from starlette.routing import Route
from llama_stack.apis.datatypes import Api, ExternalApiSpec from llama_stack.apis.datatypes import Api, ExternalApiSpec
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
from llama_stack.core.resolver import api_protocol_map from llama_stack.core.resolver import api_protocol_map
from llama_stack.schema_utils import WebMethod from llama_stack.schema_utils import WebMethod
@ -54,22 +53,23 @@ def get_all_api_routes(
protocol_methods.append((f"{tool_group.value}.{name}", method)) protocol_methods.append((f"{tool_group.value}.{name}", method))
for name, method in protocol_methods: for name, method in protocol_methods:
if not hasattr(method, "__webmethod__"): # Get all webmethods for this method (supports multiple decorators)
webmethods = getattr(method, "__webmethods__", [])
if not webmethods:
continue continue
# The __webmethod__ attribute is dynamically added by the @webmethod decorator # Create routes for each webmethod decorator
# mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error for webmethod in webmethods:
webmethod = method.__webmethod__ # type: ignore[attr-defined] path = f"/{webmethod.level}/{webmethod.route.lstrip('/')}"
path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}" if webmethod.method == hdrs.METH_GET:
if webmethod.method == hdrs.METH_GET: http_method = hdrs.METH_GET
http_method = hdrs.METH_GET elif webmethod.method == hdrs.METH_DELETE:
elif webmethod.method == hdrs.METH_DELETE: http_method = hdrs.METH_DELETE
http_method = hdrs.METH_DELETE else:
else: http_method = hdrs.METH_POST
http_method = hdrs.METH_POST routes.append(
routes.append( (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
(Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) ) # setting endpoint to None since don't use a Router object
) # setting endpoint to None since don't use a Router object
apis[api] = routes apis[api] = routes

View file

@ -45,6 +45,14 @@ class TracingMiddleware:
logger.debug(f"No matching route found for path: {path}, falling back to FastAPI") logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
return await self.app(scope, receive, send) return await self.app(scope, receive, send)
# Log deprecation warning if route is deprecated
if getattr(webmethod, "deprecated", False):
logger.warning(
f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
f"This route is deprecated and may be removed in a future version. "
f"Please check the docs for the supported version."
)
trace_attributes = {"__location__": "server", "raw_path": path} trace_attributes = {"__location__": "server", "raw_path": path}
# Extract W3C trace context headers and store as trace attributes # Extract W3C trace context headers and store as trace attributes

View file

@ -13,6 +13,7 @@ from .strong_typing.schema import json_schema_type, register_schema # noqa: F40
@dataclass @dataclass
class WebMethod: class WebMethod:
level: str | None = None
route: str | None = None route: str | None = None
public: bool = False public: bool = False
request_examples: list[Any] | None = None request_examples: list[Any] | None = None
@ -23,6 +24,7 @@ class WebMethod:
descriptive_name: str | None = None descriptive_name: str | None = None
experimental: bool | None = False experimental: bool | None = False
required_scope: str | None = None required_scope: str | None = None
deprecated: bool | None = False
T = TypeVar("T", bound=Callable[..., Any]) T = TypeVar("T", bound=Callable[..., Any])
@ -31,6 +33,7 @@ T = TypeVar("T", bound=Callable[..., Any])
def webmethod( def webmethod(
route: str | None = None, route: str | None = None,
method: str | None = None, method: str | None = None,
level: str | None = None,
public: bool | None = False, public: bool | None = False,
request_examples: list[Any] | None = None, request_examples: list[Any] | None = None,
response_examples: list[Any] | None = None, response_examples: list[Any] | None = None,
@ -38,6 +41,7 @@ def webmethod(
descriptive_name: str | None = None, descriptive_name: str | None = None,
experimental: bool | None = False, experimental: bool | None = False,
required_scope: str | None = None, required_scope: str | None = None,
deprecated: bool | None = False,
) -> Callable[[T], T]: ) -> Callable[[T], T]:
""" """
Decorator that supplies additional metadata to an endpoint operation function. Decorator that supplies additional metadata to an endpoint operation function.
@ -51,9 +55,10 @@ def webmethod(
""" """
def wrap(func: T) -> T: def wrap(func: T) -> T:
func.__webmethod__ = WebMethod( # type: ignore webmethod_obj = WebMethod(
route=route, route=route,
method=method, method=method,
level=level,
public=public or False, public=public or False,
request_examples=request_examples, request_examples=request_examples,
response_examples=response_examples, response_examples=response_examples,
@ -61,7 +66,16 @@ def webmethod(
descriptive_name=descriptive_name, descriptive_name=descriptive_name,
experimental=experimental, experimental=experimental,
required_scope=required_scope, required_scope=required_scope,
deprecated=deprecated,
) )
# Store all webmethods in a list to support multiple decorators
if not hasattr(func, "__webmethods__"):
func.__webmethods__ = [] # type: ignore
func.__webmethods__.append(webmethod_obj) # type: ignore
# Keep the last one as __webmethod__ for backwards compatibility
func.__webmethod__ = webmethod_obj # type: ignore
return func return func
return wrap return wrap

View file

@ -6,6 +6,7 @@
from typing import Protocol from typing import Protocol
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
from llama_stack.schema_utils import webmethod from llama_stack.schema_utils import webmethod
@ -28,7 +29,7 @@ class WeatherProvider(Protocol):
A protocol for the Weather API. A protocol for the Weather API.
""" """
@webmethod(route="/weather/locations", method="GET") @webmethod(route="/weather/locations", method="GET", level=LLAMA_STACK_API_V1)
async def get_available_locations() -> dict[str, list[str]]: async def get_available_locations() -> dict[str, list[str]]:
""" """
Get the available locations. Get the available locations.