mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
feat: introduce API leveling, post_training to v1alpha
Rather than have a single `LLAMA_STACK_VERSION`, we need to have a `_V1`, `_V1ALPHA`, and `_V1BETA` constant. This also necessitated addition of `level` to the `WebMethod` so that routing can be handeled properly. For backwards compat, the `v1` routes are being kept around and marked as `deprecated`. When used, the server will log a deprecation warning. move: post_training to v1alpha as it is under heavy development and not near its final state eval: job scheduling is not implemented. Relies heavily on the datasetio API which is under development missing implementations of specific routes indicating the structure of those routes might change. Additionally eval depends on the inference API which is going to be deprecated, eval will likely need a major API surface change to conform to using completions properly Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
a50b63906c
commit
03399cebf3
35 changed files with 1507 additions and 260 deletions
|
@ -16,7 +16,7 @@ import sys
|
|||
import fire
|
||||
import ruamel.yaml as yaml
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
|
||||
from llama_stack.core.stack import LlamaStack # noqa: E402
|
||||
|
||||
from .pyopenapi.options import Options # noqa: E402
|
||||
|
@ -25,7 +25,7 @@ from .pyopenapi.utility import Specification, validate_api # noqa: E402
|
|||
|
||||
|
||||
def str_presenter(dumper, data):
|
||||
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
|
||||
if data.startswith(f"/{LLAMA_STACK_API_V1}") or data.startswith(
|
||||
"#/components/schemas/"
|
||||
):
|
||||
style = None
|
||||
|
@ -58,7 +58,7 @@ def main(output_dir: str):
|
|||
server=Server(url="http://any-hosted-llama-stack.com"),
|
||||
info=Info(
|
||||
title="Llama Stack Specification",
|
||||
version=LLAMA_STACK_API_VERSION,
|
||||
version=LLAMA_STACK_API_V1,
|
||||
description="""This is the specification of the Llama Stack that provides
|
||||
a set of endpoints and their corresponding interfaces that are tailored to
|
||||
best leverage Llama Models.""",
|
||||
|
|
|
@ -829,7 +829,7 @@ class Generator:
|
|||
else:
|
||||
raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
|
||||
|
||||
route = op.get_route()
|
||||
route = op.get_route(op.webmethod)
|
||||
route = route.replace(":path", "")
|
||||
print(f"route: {route}")
|
||||
if route in paths:
|
||||
|
|
|
@ -11,7 +11,7 @@ import typing
|
|||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
|
||||
|
||||
from termcolor import colored
|
||||
|
||||
|
@ -113,11 +113,13 @@ class EndpointOperation:
|
|||
request_examples: Optional[List[Any]] = None
|
||||
response_examples: Optional[List[Any]] = None
|
||||
|
||||
def get_route(self) -> str:
|
||||
if self.route is not None:
|
||||
return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])
|
||||
def get_route(self, webmethod) -> str:
|
||||
api_level = webmethod.level
|
||||
|
||||
route_parts = ["", LLAMA_STACK_API_VERSION, self.name]
|
||||
if self.route is not None:
|
||||
return "/".join(["", api_level, self.route.lstrip("/")])
|
||||
|
||||
route_parts = ["", api_level, self.name]
|
||||
for param_name, _ in self.path_params:
|
||||
route_parts.append("{" + param_name + "}")
|
||||
return "/".join(route_parts)
|
||||
|
@ -152,33 +154,39 @@ def _get_endpoint_functions(
|
|||
|
||||
functions = inspect.getmembers(endpoint, inspect.isfunction)
|
||||
for func_name, func_ref in functions:
|
||||
webmethod = getattr(func_ref, "__webmethod__", None)
|
||||
if not webmethod:
|
||||
webmethods = []
|
||||
|
||||
# Check for multiple webmethods (stacked decorators)
|
||||
if hasattr(func_ref, "__webmethods__"):
|
||||
webmethods = func_ref.__webmethods__
|
||||
|
||||
if not webmethods:
|
||||
continue
|
||||
|
||||
print(f"Processing {colored(func_name, 'white')}...")
|
||||
operation_name = func_name
|
||||
|
||||
if webmethod.method == "GET":
|
||||
prefix = "get"
|
||||
elif webmethod.method == "DELETE":
|
||||
prefix = "delete"
|
||||
elif webmethod.method == "POST":
|
||||
prefix = "post"
|
||||
elif operation_name.startswith("get_") or operation_name.endswith("/get"):
|
||||
prefix = "get"
|
||||
elif (
|
||||
operation_name.startswith("delete_")
|
||||
or operation_name.startswith("remove_")
|
||||
or operation_name.endswith("/delete")
|
||||
or operation_name.endswith("/remove")
|
||||
):
|
||||
prefix = "delete"
|
||||
else:
|
||||
# by default everything else is a POST
|
||||
prefix = "post"
|
||||
for webmethod in webmethods:
|
||||
print(f"Processing {colored(func_name, 'white')}...")
|
||||
operation_name = func_name
|
||||
|
||||
if webmethod.method == "GET":
|
||||
prefix = "get"
|
||||
elif webmethod.method == "DELETE":
|
||||
prefix = "delete"
|
||||
elif webmethod.method == "POST":
|
||||
prefix = "post"
|
||||
elif operation_name.startswith("get_") or operation_name.endswith("/get"):
|
||||
prefix = "get"
|
||||
elif (
|
||||
operation_name.startswith("delete_")
|
||||
or operation_name.startswith("remove_")
|
||||
or operation_name.endswith("/delete")
|
||||
or operation_name.endswith("/remove")
|
||||
):
|
||||
prefix = "delete"
|
||||
else:
|
||||
# by default everything else is a POST
|
||||
prefix = "post"
|
||||
|
||||
yield prefix, operation_name, func_name, func_ref
|
||||
yield prefix, operation_name, func_name, func_ref
|
||||
|
||||
|
||||
def _get_defining_class(member_fn: str, derived_cls: type) -> type:
|
||||
|
@ -239,105 +247,101 @@ def get_endpoint_operations(
|
|||
"update",
|
||||
],
|
||||
):
|
||||
# extract routing information from function metadata
|
||||
webmethod = getattr(func_ref, "__webmethod__", None)
|
||||
if webmethod is not None:
|
||||
# Get all webmethods for this function
|
||||
webmethods = getattr(func_ref, "__webmethods__", [])
|
||||
|
||||
# Create one EndpointOperation for each webmethod
|
||||
for webmethod in webmethods:
|
||||
route = webmethod.route
|
||||
route_params = _get_route_parameters(route) if route is not None else None
|
||||
public = webmethod.public
|
||||
request_examples = webmethod.request_examples
|
||||
response_examples = webmethod.response_examples
|
||||
else:
|
||||
route = None
|
||||
route_params = None
|
||||
public = False
|
||||
request_examples = None
|
||||
response_examples = None
|
||||
|
||||
# inspect function signature for path and query parameters, and request/response payload type
|
||||
signature = get_signature(func_ref)
|
||||
# inspect function signature for path and query parameters, and request/response payload type
|
||||
signature = get_signature(func_ref)
|
||||
|
||||
path_params = []
|
||||
query_params = []
|
||||
request_params = []
|
||||
multipart_params = []
|
||||
path_params = []
|
||||
query_params = []
|
||||
request_params = []
|
||||
multipart_params = []
|
||||
|
||||
for param_name, parameter in signature.parameters.items():
|
||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
||||
for param_name, parameter in signature.parameters.items():
|
||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
||||
|
||||
# omit "self" for instance methods
|
||||
if param_name == "self" and param_type is inspect.Parameter.empty:
|
||||
continue
|
||||
# omit "self" for instance methods
|
||||
if param_name == "self" and param_type is inspect.Parameter.empty:
|
||||
continue
|
||||
|
||||
# check if all parameters have explicit type
|
||||
if parameter.annotation is inspect.Parameter.empty:
|
||||
# check if all parameters have explicit type
|
||||
if parameter.annotation is inspect.Parameter.empty:
|
||||
raise ValidationError(
|
||||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
||||
)
|
||||
|
||||
is_multipart = _is_multipart_param(param_type)
|
||||
|
||||
if prefix in ["get", "delete"]:
|
||||
if route_params is not None and param_name in route_params:
|
||||
path_params.append((param_name, param_type))
|
||||
else:
|
||||
query_params.append((param_name, param_type))
|
||||
else:
|
||||
if route_params is not None and param_name in route_params:
|
||||
path_params.append((param_name, param_type))
|
||||
elif is_multipart:
|
||||
multipart_params.append((param_name, param_type))
|
||||
else:
|
||||
request_params.append((param_name, param_type))
|
||||
|
||||
# check if function has explicit return type
|
||||
if signature.return_annotation is inspect.Signature.empty:
|
||||
raise ValidationError(
|
||||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
||||
f"function '{func_name}' has no return type annotation"
|
||||
)
|
||||
|
||||
is_multipart = _is_multipart_param(param_type)
|
||||
|
||||
if prefix in ["get", "delete"]:
|
||||
if route_params is not None and param_name in route_params:
|
||||
path_params.append((param_name, param_type))
|
||||
else:
|
||||
query_params.append((param_name, param_type))
|
||||
return_type = _get_annotation_type(signature.return_annotation, func_ref)
|
||||
|
||||
# operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
|
||||
# where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
|
||||
if typing.get_origin(return_type) is collections.abc.Generator:
|
||||
event_type, send_type, response_type = typing.get_args(return_type)
|
||||
if send_type is not type(None):
|
||||
raise ValidationError(
|
||||
f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
|
||||
)
|
||||
else:
|
||||
if route_params is not None and param_name in route_params:
|
||||
path_params.append((param_name, param_type))
|
||||
elif is_multipart:
|
||||
multipart_params.append((param_name, param_type))
|
||||
event_type = None
|
||||
|
||||
def process_type(t):
|
||||
if typing.get_origin(t) is collections.abc.AsyncIterator:
|
||||
# NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
|
||||
# or the item type. I am choosing it to be the latter
|
||||
args = typing.get_args(t)
|
||||
return args[0]
|
||||
elif typing.get_origin(t) is typing.Union:
|
||||
types = [process_type(a) for a in typing.get_args(t)]
|
||||
return typing._UnionGenericAlias(typing.Union, tuple(types))
|
||||
else:
|
||||
return t
|
||||
|
||||
response_type = process_type(return_type)
|
||||
|
||||
if prefix in ["delete", "remove"]:
|
||||
http_method = HTTPMethod.DELETE
|
||||
elif prefix == "post":
|
||||
http_method = HTTPMethod.POST
|
||||
elif prefix == "get":
|
||||
http_method = HTTPMethod.GET
|
||||
elif prefix == "set":
|
||||
http_method = HTTPMethod.PUT
|
||||
elif prefix == "update":
|
||||
http_method = HTTPMethod.PATCH
|
||||
else:
|
||||
request_params.append((param_name, param_type))
|
||||
raise ValidationError(f"unknown prefix {prefix}")
|
||||
|
||||
# check if function has explicit return type
|
||||
if signature.return_annotation is inspect.Signature.empty:
|
||||
raise ValidationError(
|
||||
f"function '{func_name}' has no return type annotation"
|
||||
)
|
||||
|
||||
return_type = _get_annotation_type(signature.return_annotation, func_ref)
|
||||
|
||||
# operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
|
||||
# where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
|
||||
if typing.get_origin(return_type) is collections.abc.Generator:
|
||||
event_type, send_type, response_type = typing.get_args(return_type)
|
||||
if send_type is not type(None):
|
||||
raise ValidationError(
|
||||
f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
|
||||
)
|
||||
else:
|
||||
event_type = None
|
||||
|
||||
def process_type(t):
|
||||
if typing.get_origin(t) is collections.abc.AsyncIterator:
|
||||
# NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
|
||||
# or the item type. I am choosing it to be the latter
|
||||
args = typing.get_args(t)
|
||||
return args[0]
|
||||
elif typing.get_origin(t) is typing.Union:
|
||||
types = [process_type(a) for a in typing.get_args(t)]
|
||||
return typing._UnionGenericAlias(typing.Union, tuple(types))
|
||||
else:
|
||||
return t
|
||||
|
||||
response_type = process_type(return_type)
|
||||
|
||||
if prefix in ["delete", "remove"]:
|
||||
http_method = HTTPMethod.DELETE
|
||||
elif prefix == "post":
|
||||
http_method = HTTPMethod.POST
|
||||
elif prefix == "get":
|
||||
http_method = HTTPMethod.GET
|
||||
elif prefix == "set":
|
||||
http_method = HTTPMethod.PUT
|
||||
elif prefix == "update":
|
||||
http_method = HTTPMethod.PATCH
|
||||
else:
|
||||
raise ValidationError(f"unknown prefix {prefix}")
|
||||
|
||||
result.append(
|
||||
EndpointOperation(
|
||||
# Create an EndpointOperation for this specific webmethod
|
||||
operation = EndpointOperation(
|
||||
defining_class=_get_defining_class(func_name, endpoint),
|
||||
name=operation_name,
|
||||
func_name=func_name,
|
||||
|
@ -354,7 +358,10 @@ def get_endpoint_operations(
|
|||
request_examples=request_examples if use_examples else None,
|
||||
response_examples=response_examples if use_examples else None,
|
||||
)
|
||||
)
|
||||
|
||||
# Store the specific webmethod with this operation
|
||||
operation.webmethod = webmethod
|
||||
result.append(operation)
|
||||
|
||||
if not result:
|
||||
raise ValidationError(f"no eligible endpoint operations in type {endpoint}")
|
||||
|
|
653
docs/static/llama-stack-spec.html
vendored
653
docs/static/llama-stack-spec.html
vendored
|
@ -175,6 +175,43 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/job/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"PostTraining (Coming Soon)"
|
||||
],
|
||||
"summary": "Cancel a training job.",
|
||||
"description": "Cancel a training job.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CancelTrainingJobRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/post-training/job/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -1179,6 +1216,60 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "EvaluateResponse object containing generations and scores.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"summary": "Evaluate a list of rows on a benchmark.",
|
||||
"description": "Evaluate a list of rows on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateRowsRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/evaluations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -1366,6 +1457,85 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/eval/benchmarks/{benchmark_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A Benchmark.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"summary": "Get a benchmark by its ID.",
|
||||
"description": "Get a benchmark by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"summary": "Unregister a benchmark.",
|
||||
"description": "Unregister a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to unregister.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2079,6 +2249,50 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/job/artifacts": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A PostTrainingJobArtifactsResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"PostTraining (Coming Soon)"
|
||||
],
|
||||
"summary": "Get the artifacts of a training job.",
|
||||
"description": "Get the artifacts of a training job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "job_uuid",
|
||||
"in": "query",
|
||||
"description": "The UUID of the job to get the artifacts of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/post-training/job/artifacts": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2123,6 +2337,50 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/job/status": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A PostTrainingJobStatusResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"PostTraining (Coming Soon)"
|
||||
],
|
||||
"summary": "Get the status of a training job.",
|
||||
"description": "Get the status of a training job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "job_uuid",
|
||||
"in": "query",
|
||||
"description": "The UUID of the job to get the status of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/post-training/job/status": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2167,6 +2425,40 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/jobs": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A ListPostTrainingJobsResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListPostTrainingJobsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"PostTraining (Coming Soon)"
|
||||
],
|
||||
"summary": "Get all training jobs.",
|
||||
"description": "Get all training jobs.",
|
||||
"parameters": []
|
||||
}
|
||||
},
|
||||
"/v1/post-training/jobs": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2538,6 +2830,103 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The status of the evaluation job.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"summary": "Get the status of a job.",
|
||||
"description": "Get the status of a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the status of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"summary": "Cancel a job.",
|
||||
"description": "Cancel a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2635,6 +3024,59 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The result of the job.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"summary": "Get the result of a job.",
|
||||
"description": "Get the result of a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the result of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2750,6 +3192,75 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1alpha/eval/benchmarks": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A ListBenchmarksResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBenchmarksResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"summary": "List all benchmarks.",
|
||||
"description": "List all benchmarks.",
|
||||
"parameters": []
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"summary": "Register a benchmark.",
|
||||
"description": "Register a benchmark.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RegisterBenchmarkRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -4783,6 +5294,50 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/preference-optimize": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A PostTrainingJob.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PostTrainingJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"PostTraining (Coming Soon)"
|
||||
],
|
||||
"summary": "Run preference optimization of a model.",
|
||||
"description": "Run preference optimization of a model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PreferenceOptimizeRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/post-training/preference-optimize": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -5178,6 +5733,60 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1alpha/eval/benchmarks/{benchmark_id}/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The job that was created to run the evaluation.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"summary": "Run an evaluation on a benchmark.",
|
||||
"description": "Run an evaluation on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RunEvalRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -5499,6 +6108,50 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1alpha/post-training/supervised-fine-tune": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A PostTrainingJob.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PostTrainingJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"PostTraining (Coming Soon)"
|
||||
],
|
||||
"summary": "Run supervised fine-tuning of a model.",
|
||||
"description": "Run supervised fine-tuning of a model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SupervisedFineTuneRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/post-training/supervised-fine-tune": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
|
458
docs/static/llama-stack-spec.yaml
vendored
458
docs/static/llama-stack-spec.yaml
vendored
|
@ -109,6 +109,32 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||
required: true
|
||||
/v1alpha/post-training/job/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- PostTraining (Coming Soon)
|
||||
summary: Cancel a training job.
|
||||
description: Cancel a training job.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CancelTrainingJobRequest'
|
||||
required: true
|
||||
/v1/post-training/job/cancel:
|
||||
post:
|
||||
responses:
|
||||
|
@ -832,6 +858,44 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/EmbeddingsRequest'
|
||||
required: true
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
EvaluateResponse object containing generations and scores.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
summary: Evaluate a list of rows on a benchmark.
|
||||
description: Evaluate a list of rows on a benchmark.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateRowsRequest'
|
||||
required: true
|
||||
/v1/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
responses:
|
||||
|
@ -962,6 +1026,61 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Benchmark.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Get a benchmark by its ID.
|
||||
description: Get a benchmark by its ID.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: The ID of the benchmark to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Unregister a benchmark.
|
||||
description: Unregister a benchmark.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: The ID of the benchmark to unregister.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks/{benchmark_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1458,6 +1577,37 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1alpha/post-training/job/artifacts:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A PostTrainingJobArtifactsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- PostTraining (Coming Soon)
|
||||
summary: Get the artifacts of a training job.
|
||||
description: Get the artifacts of a training job.
|
||||
parameters:
|
||||
- name: job_uuid
|
||||
in: query
|
||||
description: >-
|
||||
The UUID of the job to get the artifacts of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/post-training/job/artifacts:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1489,6 +1639,37 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1alpha/post-training/job/status:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A PostTrainingJobStatusResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- PostTraining (Coming Soon)
|
||||
summary: Get the status of a training job.
|
||||
description: Get the status of a training job.
|
||||
parameters:
|
||||
- name: job_uuid
|
||||
in: query
|
||||
description: >-
|
||||
The UUID of the job to get the status of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/post-training/job/status:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1520,6 +1701,30 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1alpha/post-training/jobs:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListPostTrainingJobsResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListPostTrainingJobsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- PostTraining (Coming Soon)
|
||||
summary: Get all training jobs.
|
||||
description: Get all training jobs.
|
||||
parameters: []
|
||||
/v1/post-training/jobs:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1804,6 +2009,75 @@ paths:
|
|||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The status of the evaluation job.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
summary: Get the status of a job.
|
||||
description: Get the status of a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to get the status of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
summary: Cancel a job.
|
||||
description: Cancel a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1873,6 +2147,43 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The result of the job.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
summary: Get the result of a job.
|
||||
description: Get the result of a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to get the result of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1953,6 +2264,55 @@ paths:
|
|||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
/v1alpha/eval/benchmarks:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListBenchmarksResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBenchmarksResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: List all benchmarks.
|
||||
description: List all benchmarks.
|
||||
parameters: []
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
summary: Register a benchmark.
|
||||
description: Register a benchmark.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||
required: true
|
||||
/v1/eval/benchmarks:
|
||||
get:
|
||||
responses:
|
||||
|
@ -3454,6 +3814,36 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/OpenaiSearchVectorStoreRequest'
|
||||
required: true
|
||||
/v1alpha/post-training/preference-optimize:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A PostTrainingJob.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PostTrainingJob'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- PostTraining (Coming Soon)
|
||||
summary: Run preference optimization of a model.
|
||||
description: Run preference optimization of a model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PreferenceOptimizeRequest'
|
||||
required: true
|
||||
/v1/post-training/preference-optimize:
|
||||
post:
|
||||
responses:
|
||||
|
@ -3735,6 +4125,44 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/ResumeAgentTurnRequest'
|
||||
required: true
|
||||
/v1alpha/eval/benchmarks/{benchmark_id}/jobs:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
The job that was created to run the evaluation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
summary: Run an evaluation on a benchmark.
|
||||
description: Run an evaluation on a benchmark.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RunEvalRequest'
|
||||
required: true
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs:
|
||||
post:
|
||||
responses:
|
||||
|
@ -3961,6 +4389,36 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/SetDefaultVersionRequest'
|
||||
required: true
|
||||
/v1alpha/post-training/supervised-fine-tune:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A PostTrainingJob.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PostTrainingJob'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- PostTraining (Coming Soon)
|
||||
summary: Run supervised fine-tuning of a model.
|
||||
description: Run supervised fine-tuning of a model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
/v1/post-training/supervised-fine-tune:
|
||||
post:
|
||||
responses:
|
||||
|
|
|
@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
|
|||
)
|
||||
from llama_stack.apis.safety import SafetyViolation
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
from .openai_responses import (
|
||||
|
@ -481,7 +482,7 @@ class Agents(Protocol):
|
|||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
"""
|
||||
|
||||
@webmethod(route="/agents", method="POST", descriptive_name="create_agent")
|
||||
@webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
|
||||
async def create_agent(
|
||||
self,
|
||||
agent_config: AgentConfig,
|
||||
|
@ -494,7 +495,10 @@ class Agents(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
|
||||
route="/agents/{agent_id}/session/{session_id}/turn",
|
||||
method="POST",
|
||||
descriptive_name="create_agent_turn",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def create_agent_turn(
|
||||
self,
|
||||
|
@ -524,6 +528,7 @@ class Agents(Protocol):
|
|||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
||||
method="POST",
|
||||
descriptive_name="resume_agent_turn",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def resume_agent_turn(
|
||||
self,
|
||||
|
@ -549,6 +554,7 @@ class Agents(Protocol):
|
|||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def get_agents_turn(
|
||||
self,
|
||||
|
@ -568,6 +574,7 @@ class Agents(Protocol):
|
|||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def get_agents_step(
|
||||
self,
|
||||
|
@ -586,7 +593,12 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session",
|
||||
method="POST",
|
||||
descriptive_name="create_agent_session",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def create_agent_session(
|
||||
self,
|
||||
agent_id: str,
|
||||
|
@ -600,7 +612,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_agents_session(
|
||||
self,
|
||||
session_id: str,
|
||||
|
@ -616,7 +628,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
|
||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_agents_session(
|
||||
self,
|
||||
session_id: str,
|
||||
|
@ -629,7 +641,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}", method="DELETE")
|
||||
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_agent(
|
||||
self,
|
||||
agent_id: str,
|
||||
|
@ -640,7 +652,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents", method="GET")
|
||||
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
|
||||
"""List all agents.
|
||||
|
||||
|
@ -650,7 +662,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}", method="GET")
|
||||
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_agent(self, agent_id: str) -> Agent:
|
||||
"""Describe an agent by its ID.
|
||||
|
||||
|
@ -659,7 +671,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET")
|
||||
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_agent_sessions(
|
||||
self,
|
||||
agent_id: str,
|
||||
|
@ -682,7 +694,7 @@ class Agents(Protocol):
|
|||
#
|
||||
# Both of these APIs are inherently stateful.
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="GET")
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
|
@ -694,7 +706,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="POST")
|
||||
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: str | list[OpenAIResponseInput],
|
||||
|
@ -719,7 +731,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses", method="GET")
|
||||
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_responses(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
@ -737,7 +749,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
|
||||
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_openai_response_input_items(
|
||||
self,
|
||||
response_id: str,
|
||||
|
@ -759,7 +771,7 @@ class Agents(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
|
||||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete an OpenAI response by its ID.
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
|
|||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
||||
|
@ -30,7 +31,7 @@ class BatchInference(Protocol):
|
|||
including (post-training, evals, etc).
|
||||
"""
|
||||
|
||||
@webmethod(route="/batch-inference/completion", method="POST")
|
||||
@webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def completion(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -50,7 +51,7 @@ class BatchInference(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/batch-inference/chat-completion", method="POST")
|
||||
@webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
try:
|
||||
|
@ -42,7 +43,7 @@ class Batches(Protocol):
|
|||
Note: This API is currently under active development and may undergo changes.
|
||||
"""
|
||||
|
||||
@webmethod(route="/openai/v1/batches", method="POST")
|
||||
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_batch(
|
||||
self,
|
||||
input_file_id: str,
|
||||
|
@ -62,7 +63,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Retrieve information about a specific batch.
|
||||
|
||||
|
@ -71,7 +72,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
|
||||
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
||||
"""Cancel a batch that is in progress.
|
||||
|
||||
|
@ -80,7 +81,7 @@ class Batches(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/batches", method="GET")
|
||||
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_batches(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
|
|
@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Benchmarks(Protocol):
|
||||
@webmethod(route="/eval/benchmarks", method="GET")
|
||||
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
||||
"""List all benchmarks.
|
||||
|
||||
|
@ -61,7 +63,8 @@ class Benchmarks(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
|
@ -73,7 +76,8 @@ class Benchmarks(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks", method="POST")
|
||||
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def register_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
|
@ -94,7 +98,8 @@ class Benchmarks(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""Unregister a benchmark.
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
|
|||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.apis.datasets import Dataset
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
||||
|
@ -20,7 +21,7 @@ class DatasetIO(Protocol):
|
|||
# keeping for aligning with inference/safety, but this is not used
|
||||
dataset_store: DatasetStore
|
||||
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
@ -44,7 +45,7 @@ class DatasetIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
|
||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
||||
"""Append rows to a dataset.
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
|
@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
|
|||
|
||||
|
||||
class Datasets(Protocol):
|
||||
@webmethod(route="/datasets", method="POST")
|
||||
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def register_dataset(
|
||||
self,
|
||||
purpose: DatasetPurpose,
|
||||
|
@ -214,7 +215,7 @@ class Datasets(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET")
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
@ -226,7 +227,7 @@ class Datasets(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets", method="GET")
|
||||
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_datasets(self) -> ListDatasetsResponse:
|
||||
"""List all datasets.
|
||||
|
||||
|
@ -234,7 +235,7 @@ class Datasets(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def unregister_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
|
@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
|
|||
from llama_stack.apis.inference import SamplingParams, SystemMessage
|
||||
from llama_stack.apis.scoring import ScoringResult
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
|
@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
|
|||
class Eval(Protocol):
|
||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def run_eval(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
|
@ -97,7 +99,10 @@ class Eval(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
|
||||
@webmethod(
|
||||
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def evaluate_rows(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
|
@ -115,7 +120,10 @@ class Eval(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
|
||||
@webmethod(
|
||||
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
||||
)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
|
||||
"""Get the status of a job.
|
||||
|
||||
|
@ -125,7 +133,13 @@ class Eval(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
|
||||
@webmethod(
|
||||
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
|
||||
method="DELETE",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
||||
"""Cancel a job.
|
||||
|
||||
|
@ -134,7 +148,15 @@ class Eval(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
|
||||
@webmethod(
|
||||
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
deprecated=True,
|
||||
)
|
||||
@webmethod(
|
||||
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
|
||||
)
|
||||
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
||||
"""Get the result of a job.
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/openai/v1/files", method="POST")
|
||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_upload_file(
|
||||
self,
|
||||
file: Annotated[UploadFile, File()],
|
||||
|
@ -127,7 +128,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files", method="GET")
|
||||
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_files(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
@ -146,7 +147,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="GET")
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file(
|
||||
self,
|
||||
file_id: str,
|
||||
|
@ -159,7 +160,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
|
||||
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_file(
|
||||
self,
|
||||
file_id: str,
|
||||
|
@ -172,7 +173,7 @@ class Files(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
|
||||
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_file_content(
|
||||
self,
|
||||
file_id: str,
|
||||
|
|
|
@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
|
|||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
StopReason,
|
||||
|
@ -1026,7 +1027,7 @@ class InferenceProvider(Protocol):
|
|||
|
||||
model_store: ModelStore | None = None
|
||||
|
||||
@webmethod(route="/inference/completion", method="POST")
|
||||
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -1049,7 +1050,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/batch-completion", method="POST", experimental=True)
|
||||
@webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
async def batch_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -1070,7 +1071,7 @@ class InferenceProvider(Protocol):
|
|||
raise NotImplementedError("Batch completion is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/inference/chat-completion", method="POST")
|
||||
@webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -1110,7 +1111,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
|
||||
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
async def batch_chat_completion(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -1135,7 +1136,7 @@ class InferenceProvider(Protocol):
|
|||
raise NotImplementedError("Batch chat completion is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/inference/embeddings", method="POST")
|
||||
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -1155,7 +1156,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/rerank", method="POST", experimental=True)
|
||||
@webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
||||
async def rerank(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1174,7 +1175,7 @@ class InferenceProvider(Protocol):
|
|||
raise NotImplementedError("Reranking is not implemented")
|
||||
return # this is so mypy's safe-super rule will consider the method concrete
|
||||
|
||||
@webmethod(route="/openai/v1/completions", method="POST")
|
||||
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_completion(
|
||||
self,
|
||||
# Standard OpenAI completion parameters
|
||||
|
@ -1225,7 +1226,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions", method="POST")
|
||||
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1281,7 +1282,7 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/embeddings", method="POST")
|
||||
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -1310,7 +1311,7 @@ class Inference(InferenceProvider):
|
|||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
"""
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions", method="GET")
|
||||
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_chat_completions(
|
||||
self,
|
||||
after: str | None = None,
|
||||
|
@ -1328,7 +1329,7 @@ class Inference(InferenceProvider):
|
|||
"""
|
||||
raise NotImplementedError("List chat completions is not implemented")
|
||||
|
||||
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
|
||||
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||
"""Describe a chat completion by its ID.
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Inspect(Protocol):
|
||||
@webmethod(route="/inspect/routes", method="GET")
|
||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_routes(self) -> ListRoutesResponse:
|
||||
"""List all available API routes with their methods and implementing providers.
|
||||
|
||||
|
@ -65,7 +66,7 @@ class Inspect(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/health", method="GET")
|
||||
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def health(self) -> HealthInfo:
|
||||
"""Get the current health status of the service.
|
||||
|
||||
|
@ -73,7 +74,7 @@ class Inspect(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/version", method="GET")
|
||||
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def version(self) -> VersionInfo:
|
||||
"""Get the version of the service.
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Models(Protocol):
|
||||
@webmethod(route="/models", method="GET")
|
||||
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_models(self) -> ListModelsResponse:
|
||||
"""List all models.
|
||||
|
||||
|
@ -110,7 +111,7 @@ class Models(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/models", method="GET")
|
||||
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
||||
"""List models using the OpenAI API.
|
||||
|
||||
|
@ -118,7 +119,7 @@ class Models(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models/{model_id:path}", method="GET")
|
||||
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_model(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -130,7 +131,7 @@ class Models(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models", method="POST")
|
||||
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def register_model(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -150,7 +151,7 @@ class Models(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/models/{model_id:path}", method="DELETE")
|
||||
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def unregister_model(
|
||||
self,
|
||||
model_id: str,
|
||||
|
|
|
@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
|
|||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.job_types import JobStatus
|
||||
from llama_stack.apis.common.training_types import Checkpoint
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
|
@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel):
|
|||
|
||||
|
||||
class PostTraining(Protocol):
|
||||
@webmethod(route="/post-training/supervised-fine-tune", method="POST")
|
||||
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def supervised_fine_tune(
|
||||
self,
|
||||
job_uuid: str,
|
||||
|
@ -310,7 +312,8 @@ class PostTraining(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/preference-optimize", method="POST")
|
||||
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def preference_optimize(
|
||||
self,
|
||||
job_uuid: str,
|
||||
|
@ -332,7 +335,8 @@ class PostTraining(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/jobs", method="GET")
|
||||
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
|
||||
"""Get all training jobs.
|
||||
|
||||
|
@ -340,7 +344,8 @@ class PostTraining(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/job/status", method="GET")
|
||||
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
|
||||
"""Get the status of a training job.
|
||||
|
||||
|
@ -349,7 +354,8 @@ class PostTraining(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/job/cancel", method="POST")
|
||||
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def cancel_training_job(self, job_uuid: str) -> None:
|
||||
"""Cancel a training job.
|
||||
|
||||
|
@ -357,7 +363,8 @@ class PostTraining(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/post-training/job/artifacts", method="GET")
|
||||
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
||||
"""Get the artifacts of a training job.
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -95,7 +96,7 @@ class ListPromptsResponse(BaseModel):
|
|||
class Prompts(Protocol):
|
||||
"""Protocol for prompt management operations."""
|
||||
|
||||
@webmethod(route="/prompts", method="GET")
|
||||
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_prompts(self) -> ListPromptsResponse:
|
||||
"""List all prompts.
|
||||
|
||||
|
@ -103,7 +104,7 @@ class Prompts(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}/versions", method="GET")
|
||||
@webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_prompt_versions(
|
||||
self,
|
||||
prompt_id: str,
|
||||
|
@ -115,7 +116,7 @@ class Prompts(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}", method="GET")
|
||||
@webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_prompt(
|
||||
self,
|
||||
prompt_id: str,
|
||||
|
@ -129,7 +130,7 @@ class Prompts(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts", method="POST")
|
||||
@webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_prompt(
|
||||
self,
|
||||
prompt: str,
|
||||
|
@ -143,7 +144,7 @@ class Prompts(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}", method="PUT")
|
||||
@webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
|
||||
async def update_prompt(
|
||||
self,
|
||||
prompt_id: str,
|
||||
|
@ -163,7 +164,7 @@ class Prompts(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}", method="DELETE")
|
||||
@webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_prompt(
|
||||
self,
|
||||
prompt_id: str,
|
||||
|
@ -174,7 +175,7 @@ class Prompts(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
|
||||
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
|
||||
async def set_default_version(
|
||||
self,
|
||||
prompt_id: str,
|
||||
|
|
|
@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.datatypes import HealthResponse
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -45,7 +46,7 @@ class Providers(Protocol):
|
|||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
"""
|
||||
|
||||
@webmethod(route="/providers", method="GET")
|
||||
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_providers(self) -> ListProvidersResponse:
|
||||
"""List all available providers.
|
||||
|
||||
|
@ -53,7 +54,7 @@ class Providers(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/providers/{provider_id}", method="GET")
|
||||
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
||||
"""Get detailed information about a specific provider.
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ from pydantic import BaseModel, Field
|
|||
|
||||
from llama_stack.apis.inference import Message
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -97,7 +98,7 @@ class ShieldStore(Protocol):
|
|||
class Safety(Protocol):
|
||||
shield_store: ShieldStore
|
||||
|
||||
@webmethod(route="/safety/run-shield", method="POST")
|
||||
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_shield(
|
||||
self,
|
||||
shield_id: str,
|
||||
|
@ -113,7 +114,7 @@ class Safety(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/moderations", method="POST")
|
||||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
"""Classifies if text and/or image inputs are potentially harmful.
|
||||
:param input: Input (or inputs) to classify.
|
||||
|
|
|
@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
# mapping of metric to value
|
||||
|
@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol):
|
|||
class Scoring(Protocol):
|
||||
scoring_function_store: ScoringFunctionStore
|
||||
|
||||
@webmethod(route="/scoring/score-batch", method="POST")
|
||||
@webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def score_batch(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
@ -77,7 +78,7 @@ class Scoring(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring/score", method="POST")
|
||||
@webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def score(
|
||||
self,
|
||||
input_rows: list[dict[str, Any]],
|
||||
|
|
|
@ -18,6 +18,7 @@ from pydantic import BaseModel, Field
|
|||
|
||||
from llama_stack.apis.common.type_system import ParamType
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
|
@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class ScoringFunctions(Protocol):
|
||||
@webmethod(route="/scoring-functions", method="GET")
|
||||
@webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
|
||||
"""List all scoring functions.
|
||||
|
||||
|
@ -168,7 +169,7 @@ class ScoringFunctions(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
|
||||
"""Get a scoring function by its ID.
|
||||
|
||||
|
@ -177,7 +178,7 @@ class ScoringFunctions(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring-functions", method="POST")
|
||||
@webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def register_scoring_function(
|
||||
self,
|
||||
scoring_fn_id: str,
|
||||
|
@ -198,7 +199,7 @@ class ScoringFunctions(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
|
||||
"""Unregister a scoring function.
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Shields(Protocol):
|
||||
@webmethod(route="/shields", method="GET")
|
||||
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_shields(self) -> ListShieldsResponse:
|
||||
"""List all shields.
|
||||
|
||||
|
@ -57,7 +58,7 @@ class Shields(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/shields/{identifier:path}", method="GET")
|
||||
@webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_shield(self, identifier: str) -> Shield:
|
||||
"""Get a shield by its identifier.
|
||||
|
||||
|
@ -66,7 +67,7 @@ class Shields(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/shields", method="POST")
|
||||
@webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def register_shield(
|
||||
self,
|
||||
shield_id: str,
|
||||
|
@ -84,7 +85,7 @@ class Shields(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/shields/{identifier:path}", method="DELETE")
|
||||
@webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def unregister_shield(self, identifier: str) -> None:
|
||||
"""Unregister a shield.
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ from typing import Any, Protocol
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.inference import Message
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel):
|
|||
|
||||
|
||||
class SyntheticDataGeneration(Protocol):
|
||||
@webmethod(route="/synthetic-data-generation/generate")
|
||||
@webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
|
||||
def synthetic_data_generate(
|
||||
self,
|
||||
dialogs: list[Message],
|
||||
|
|
|
@ -16,6 +16,7 @@ from typing import (
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.models.llama.datatypes import Primitive
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Telemetry(Protocol):
|
||||
@webmethod(route="/telemetry/events", method="POST")
|
||||
@webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def log_event(
|
||||
self,
|
||||
event: Event,
|
||||
|
@ -425,7 +426,7 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE)
|
||||
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
|
||||
async def query_traces(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition] | None = None,
|
||||
|
@ -443,7 +444,9 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE)
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
|
||||
)
|
||||
async def get_trace(self, trace_id: str) -> Trace:
|
||||
"""Get a trace by its ID.
|
||||
|
||||
|
@ -453,7 +456,10 @@ class Telemetry(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE
|
||||
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
|
||||
method="GET",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def get_span(self, trace_id: str, span_id: str) -> Span:
|
||||
"""Get a span by its ID.
|
||||
|
@ -464,7 +470,12 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE)
|
||||
@webmethod(
|
||||
route="/telemetry/spans/{span_id:path}/tree",
|
||||
method="POST",
|
||||
required_scope=REQUIRED_SCOPE,
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
|
@ -480,7 +491,7 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE)
|
||||
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
|
||||
async def query_spans(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
|
@ -496,7 +507,7 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/spans/export", method="POST")
|
||||
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def save_spans_to_dataset(
|
||||
self,
|
||||
attribute_filters: list[QueryCondition],
|
||||
|
@ -513,7 +524,9 @@ class Telemetry(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE)
|
||||
@webmethod(
|
||||
route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
|
||||
)
|
||||
async def query_metrics(
|
||||
self,
|
||||
metric_name: str,
|
||||
|
|
|
@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator
|
|||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class RAGToolRuntime(Protocol):
|
||||
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
|
||||
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def insert(
|
||||
self,
|
||||
documents: list[RAGDocument],
|
||||
|
@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tool-runtime/rag-tool/query", method="POST")
|
||||
@webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
|
|
|
@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable
|
|||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -151,7 +152,7 @@ class ListToolDefsResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class ToolGroups(Protocol):
|
||||
@webmethod(route="/toolgroups", method="POST")
|
||||
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def register_tool_group(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
|
@ -168,7 +169,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_tool_group(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
|
@ -180,7 +181,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups", method="GET")
|
||||
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_tool_groups(self) -> ListToolGroupsResponse:
|
||||
"""List tool groups with optional provider.
|
||||
|
||||
|
@ -188,7 +189,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools", method="GET")
|
||||
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
|
||||
"""List tools with optional tool group.
|
||||
|
||||
|
@ -197,7 +198,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools/{tool_name:path}", method="GET")
|
||||
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_tool(
|
||||
self,
|
||||
tool_name: str,
|
||||
|
@ -209,7 +210,7 @@ class ToolGroups(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def unregister_toolgroup(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
|
@ -238,7 +239,7 @@ class ToolRuntime(Protocol):
|
|||
rag_tool: RAGToolRuntime | None = None
|
||||
|
||||
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
|
||||
@webmethod(route="/tool-runtime/list-tools", method="GET")
|
||||
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_runtime_tools(
|
||||
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
|
||||
) -> ListToolDefsResponse:
|
||||
|
@ -250,7 +251,7 @@ class ToolRuntime(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tool-runtime/invoke", method="POST")
|
||||
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
|
||||
"""Run a tool with the given arguments.
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable
|
|||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class VectorDBs(Protocol):
|
||||
@webmethod(route="/vector-dbs", method="GET")
|
||||
@webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_vector_dbs(self) -> ListVectorDBsResponse:
|
||||
"""List all vector databases.
|
||||
|
||||
|
@ -73,7 +74,7 @@ class VectorDBs(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
|
||||
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
|
@ -85,7 +86,7 @@ class VectorDBs(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/vector-dbs", method="POST")
|
||||
@webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def register_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
|
@ -107,7 +108,7 @@ class VectorDBs(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
|
||||
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
"""Unregister a vector database.
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
|
|||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
@ -437,7 +438,7 @@ class VectorIO(Protocol):
|
|||
|
||||
# this will just block now until chunks are inserted, but it should
|
||||
# probably return a Job instance which can be polled for completion
|
||||
@webmethod(route="/vector-io/insert", method="POST")
|
||||
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def insert_chunks(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
|
@ -455,7 +456,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/vector-io/query", method="POST")
|
||||
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def query_chunks(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
|
@ -472,7 +473,7 @@ class VectorIO(Protocol):
|
|||
...
|
||||
|
||||
# OpenAI Vector Stores API endpoints
|
||||
@webmethod(route="/openai/v1/vector_stores", method="POST")
|
||||
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_create_vector_store(
|
||||
self,
|
||||
name: str | None = None,
|
||||
|
@ -498,7 +499,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores", method="GET")
|
||||
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_vector_stores(
|
||||
self,
|
||||
limit: int | None = 20,
|
||||
|
@ -516,7 +517,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET")
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_retrieve_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -528,7 +529,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST")
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_update_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -546,7 +547,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE")
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -558,7 +559,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST")
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_search_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -584,7 +585,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -602,7 +603,7 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET")
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -624,7 +625,9 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET")
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
|
||||
)
|
||||
async def openai_retrieve_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -638,7 +641,11 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
|
||||
method="GET",
|
||||
level=LLAMA_STACK_API_V1,
|
||||
)
|
||||
async def openai_retrieve_vector_store_file_contents(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -652,7 +659,9 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST")
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
|
||||
)
|
||||
async def openai_update_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
@ -668,7 +677,9 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE")
|
||||
@webmethod(
|
||||
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
|
||||
)
|
||||
async def openai_delete_vector_store_file(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
|
|
|
@ -4,4 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
LLAMA_STACK_API_VERSION = "v1"
|
||||
LLAMA_STACK_API_V1 = "v1"
|
||||
LLAMA_STACK_API_V1BETA = "v1beta"
|
||||
LLAMA_STACK_API_V1ALPHA = "v1alpha"
|
||||
|
|
|
@ -15,7 +15,6 @@ import httpx
|
|||
from pydantic import BaseModel, parse_obj_as
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||
from llama_stack.providers.datatypes import RemoteProviderConfig
|
||||
|
||||
_CLIENT_CLASSES = {}
|
||||
|
@ -114,7 +113,24 @@ def create_api_client_class(protocol) -> type:
|
|||
break
|
||||
kwargs[param.name] = args[i]
|
||||
|
||||
url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
|
||||
# Get all webmethods for this method (supports multiple decorators)
|
||||
webmethods = getattr(method, "__webmethods__", [])
|
||||
|
||||
if not webmethods:
|
||||
raise RuntimeError(f"Method {method} has no webmethod decorators")
|
||||
|
||||
# Choose the preferred webmethod (non-deprecated if available)
|
||||
preferred_webmethod = None
|
||||
for wm in webmethods:
|
||||
if not getattr(wm, "deprecated", False):
|
||||
preferred_webmethod = wm
|
||||
break
|
||||
|
||||
# If no non-deprecated found, use the first one
|
||||
if preferred_webmethod is None:
|
||||
preferred_webmethod = webmethods[0]
|
||||
|
||||
url = f"{self.base_url}/{preferred_webmethod.level}/{preferred_webmethod.route.lstrip('/')}"
|
||||
|
||||
def convert(value):
|
||||
if isinstance(value, list):
|
||||
|
|
|
@ -14,7 +14,6 @@ from starlette.routing import Route
|
|||
|
||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||
from llama_stack.core.resolver import api_protocol_map
|
||||
from llama_stack.schema_utils import WebMethod
|
||||
|
||||
|
@ -54,22 +53,23 @@ def get_all_api_routes(
|
|||
protocol_methods.append((f"{tool_group.value}.{name}", method))
|
||||
|
||||
for name, method in protocol_methods:
|
||||
if not hasattr(method, "__webmethod__"):
|
||||
# Get all webmethods for this method (supports multiple decorators)
|
||||
webmethods = getattr(method, "__webmethods__", [])
|
||||
if not webmethods:
|
||||
continue
|
||||
|
||||
# The __webmethod__ attribute is dynamically added by the @webmethod decorator
|
||||
# mypy doesn't know about this dynamic attribute, so we ignore the attr-defined error
|
||||
webmethod = method.__webmethod__ # type: ignore[attr-defined]
|
||||
path = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
|
||||
if webmethod.method == hdrs.METH_GET:
|
||||
http_method = hdrs.METH_GET
|
||||
elif webmethod.method == hdrs.METH_DELETE:
|
||||
http_method = hdrs.METH_DELETE
|
||||
else:
|
||||
http_method = hdrs.METH_POST
|
||||
routes.append(
|
||||
(Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
|
||||
) # setting endpoint to None since don't use a Router object
|
||||
# Create routes for each webmethod decorator
|
||||
for webmethod in webmethods:
|
||||
path = f"/{webmethod.level}/{webmethod.route.lstrip('/')}"
|
||||
if webmethod.method == hdrs.METH_GET:
|
||||
http_method = hdrs.METH_GET
|
||||
elif webmethod.method == hdrs.METH_DELETE:
|
||||
http_method = hdrs.METH_DELETE
|
||||
else:
|
||||
http_method = hdrs.METH_POST
|
||||
routes.append(
|
||||
(Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod)
|
||||
) # setting endpoint to None since don't use a Router object
|
||||
|
||||
apis[api] = routes
|
||||
|
||||
|
|
|
@ -45,6 +45,14 @@ class TracingMiddleware:
|
|||
logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
|
||||
return await self.app(scope, receive, send)
|
||||
|
||||
# Log deprecation warning if route is deprecated
|
||||
if getattr(webmethod, "deprecated", False):
|
||||
logger.warning(
|
||||
f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
|
||||
f"This route is deprecated and may be removed in a future version. "
|
||||
f"Please check the docs for the supported version."
|
||||
)
|
||||
|
||||
trace_attributes = {"__location__": "server", "raw_path": path}
|
||||
|
||||
# Extract W3C trace context headers and store as trace attributes
|
||||
|
|
|
@ -13,6 +13,7 @@ from .strong_typing.schema import json_schema_type, register_schema # noqa: F40
|
|||
|
||||
@dataclass
|
||||
class WebMethod:
|
||||
level: str | None = None
|
||||
route: str | None = None
|
||||
public: bool = False
|
||||
request_examples: list[Any] | None = None
|
||||
|
@ -23,6 +24,7 @@ class WebMethod:
|
|||
descriptive_name: str | None = None
|
||||
experimental: bool | None = False
|
||||
required_scope: str | None = None
|
||||
deprecated: bool | None = False
|
||||
|
||||
|
||||
T = TypeVar("T", bound=Callable[..., Any])
|
||||
|
@ -31,6 +33,7 @@ T = TypeVar("T", bound=Callable[..., Any])
|
|||
def webmethod(
|
||||
route: str | None = None,
|
||||
method: str | None = None,
|
||||
level: str | None = None,
|
||||
public: bool | None = False,
|
||||
request_examples: list[Any] | None = None,
|
||||
response_examples: list[Any] | None = None,
|
||||
|
@ -38,6 +41,7 @@ def webmethod(
|
|||
descriptive_name: str | None = None,
|
||||
experimental: bool | None = False,
|
||||
required_scope: str | None = None,
|
||||
deprecated: bool | None = False,
|
||||
) -> Callable[[T], T]:
|
||||
"""
|
||||
Decorator that supplies additional metadata to an endpoint operation function.
|
||||
|
@ -51,9 +55,10 @@ def webmethod(
|
|||
"""
|
||||
|
||||
def wrap(func: T) -> T:
|
||||
func.__webmethod__ = WebMethod( # type: ignore
|
||||
webmethod_obj = WebMethod(
|
||||
route=route,
|
||||
method=method,
|
||||
level=level,
|
||||
public=public or False,
|
||||
request_examples=request_examples,
|
||||
response_examples=response_examples,
|
||||
|
@ -61,7 +66,16 @@ def webmethod(
|
|||
descriptive_name=descriptive_name,
|
||||
experimental=experimental,
|
||||
required_scope=required_scope,
|
||||
deprecated=deprecated,
|
||||
)
|
||||
|
||||
# Store all webmethods in a list to support multiple decorators
|
||||
if not hasattr(func, "__webmethods__"):
|
||||
func.__webmethods__ = [] # type: ignore
|
||||
func.__webmethods__.append(webmethod_obj) # type: ignore
|
||||
|
||||
# Keep the last one as __webmethod__ for backwards compatibility
|
||||
func.__webmethod__ = webmethod_obj # type: ignore
|
||||
return func
|
||||
|
||||
return wrap
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
from typing import Protocol
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
|
||||
from llama_stack.schema_utils import webmethod
|
||||
|
||||
|
@ -28,7 +29,7 @@ class WeatherProvider(Protocol):
|
|||
A protocol for the Weather API.
|
||||
"""
|
||||
|
||||
@webmethod(route="/weather/locations", method="GET")
|
||||
@webmethod(route="/weather/locations", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_available_locations() -> dict[str, list[str]]:
|
||||
"""
|
||||
Get the available locations.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue