mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
Update OpenAPI generator to add param and field documentation
This commit is contained in:
parent
9f709387e2
commit
ebfa8ad4fb
7 changed files with 525 additions and 397 deletions
|
@ -36,6 +36,16 @@ from .pyopenapi.specification import Info, Server # noqa: E402
|
||||||
from .pyopenapi.utility import Specification # noqa: E402
|
from .pyopenapi.utility import Specification # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def str_presenter(dumper, data):
|
||||||
|
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
|
||||||
|
"#/components/schemas/"
|
||||||
|
):
|
||||||
|
style = None
|
||||||
|
else:
|
||||||
|
style = ">" if "\n" in data or len(data) > 40 else None
|
||||||
|
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
||||||
|
|
||||||
|
|
||||||
def main(output_dir: str):
|
def main(output_dir: str):
|
||||||
output_dir = Path(output_dir)
|
output_dir = Path(output_dir)
|
||||||
if not output_dir.exists():
|
if not output_dir.exists():
|
||||||
|
@ -69,7 +79,8 @@ def main(output_dir: str):
|
||||||
y.sequence_dash_offset = 2
|
y.sequence_dash_offset = 2
|
||||||
y.width = 80
|
y.width = 80
|
||||||
y.allow_unicode = True
|
y.allow_unicode = True
|
||||||
y.explicit_start = True
|
y.representer.add_representer(str, str_presenter)
|
||||||
|
|
||||||
y.dump(
|
y.dump(
|
||||||
spec.get_json(),
|
spec.get_json(),
|
||||||
fp,
|
fp,
|
||||||
|
|
|
@ -8,6 +8,7 @@ import collections
|
||||||
import hashlib
|
import hashlib
|
||||||
import ipaddress
|
import ipaddress
|
||||||
import typing
|
import typing
|
||||||
|
from dataclasses import field, make_dataclass
|
||||||
from typing import Any, Dict, Set, Union
|
from typing import Any, Dict, Set, Union
|
||||||
|
|
||||||
from ..strong_typing.core import JsonType
|
from ..strong_typing.core import JsonType
|
||||||
|
@ -276,6 +277,20 @@ class StatusResponse:
|
||||||
examples: List[Any] = dataclasses.field(default_factory=list)
|
examples: List[Any] = dataclasses.field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def create_docstring_for_request(
|
||||||
|
request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
|
||||||
|
) -> str:
|
||||||
|
"""Creates a ReST-style docstring for a dynamically generated request dataclass."""
|
||||||
|
lines = ["\n"] # Short description
|
||||||
|
|
||||||
|
# Add parameter documentation in ReST format
|
||||||
|
for name, type_ in fields:
|
||||||
|
desc = doc_params.get(name, "")
|
||||||
|
lines.append(f":param {name}: {desc}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
class ResponseBuilder:
|
class ResponseBuilder:
|
||||||
content_builder: ContentBuilder
|
content_builder: ContentBuilder
|
||||||
|
|
||||||
|
@ -493,11 +508,24 @@ class Generator:
|
||||||
first = next(iter(op.request_params))
|
first = next(iter(op.request_params))
|
||||||
request_name, request_type = first
|
request_name, request_type = first
|
||||||
|
|
||||||
from dataclasses import make_dataclass
|
|
||||||
|
|
||||||
op_name = "".join(word.capitalize() for word in op.name.split("_"))
|
op_name = "".join(word.capitalize() for word in op.name.split("_"))
|
||||||
request_name = f"{op_name}Request"
|
request_name = f"{op_name}Request"
|
||||||
request_type = make_dataclass(request_name, op.request_params)
|
fields = [
|
||||||
|
(
|
||||||
|
name,
|
||||||
|
type_,
|
||||||
|
)
|
||||||
|
for name, type_ in op.request_params
|
||||||
|
]
|
||||||
|
request_type = make_dataclass(
|
||||||
|
request_name,
|
||||||
|
fields,
|
||||||
|
namespace={
|
||||||
|
"__doc__": create_docstring_for_request(
|
||||||
|
request_name, fields, doc_params
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
requestBody = RequestBody(
|
requestBody = RequestBody(
|
||||||
content={
|
content={
|
||||||
|
|
|
@ -531,6 +531,7 @@ class JsonSchemaGenerator:
|
||||||
# add property docstring if available
|
# add property docstring if available
|
||||||
property_doc = property_docstrings.get(property_name)
|
property_doc = property_docstrings.get(property_name)
|
||||||
if property_doc:
|
if property_doc:
|
||||||
|
# print(output_name, property_doc)
|
||||||
property_def.pop("title", None)
|
property_def.pop("title", None)
|
||||||
property_def["description"] = property_doc
|
property_def["description"] = property_doc
|
||||||
|
|
||||||
|
|
|
@ -190,7 +190,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "Chat completion response. **OR** SSE-stream of these events.",
|
"description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
|
||||||
"content": {
|
"content": {
|
||||||
"text/event-stream": {
|
"text/event-stream": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -210,6 +210,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
|
"summary": "Generate a chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -227,7 +228,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "Completion response. **OR** streamed completion response.",
|
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
|
||||||
"content": {
|
"content": {
|
||||||
"text/event-stream": {
|
"text/event-stream": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -247,6 +248,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
|
"summary": "Generate a completion for the given content using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -485,7 +487,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "An array of embeddings, one for each content. Each embedding is a list of floats.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -498,6 +500,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
|
"summary": "Generate embeddings for content pieces using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -2372,6 +2375,46 @@
|
||||||
"tool_calls"
|
"tool_calls"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"GrammarResponseFormat": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "grammar",
|
||||||
|
"default": "grammar"
|
||||||
|
},
|
||||||
|
"bnf": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"bnf"
|
||||||
|
]
|
||||||
|
},
|
||||||
"GreedySamplingStrategy": {
|
"GreedySamplingStrategy": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2447,6 +2490,46 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"JsonSchemaResponseFormat": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "json_schema",
|
||||||
|
"default": "json_schema"
|
||||||
|
},
|
||||||
|
"json_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"json_schema"
|
||||||
|
]
|
||||||
|
},
|
||||||
"Message": {
|
"Message": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
@ -2472,6 +2555,23 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"ResponseFormat": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/GrammarResponseFormat"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"discriminator": {
|
||||||
|
"propertyName": "type",
|
||||||
|
"mapping": {
|
||||||
|
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
|
||||||
|
"grammar": "#/components/schemas/GrammarResponseFormat"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"SamplingParams": {
|
"SamplingParams": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2865,6 +2965,9 @@
|
||||||
"tool_prompt_format": {
|
"tool_prompt_format": {
|
||||||
"$ref": "#/components/schemas/ToolPromptFormat"
|
"$ref": "#/components/schemas/ToolPromptFormat"
|
||||||
},
|
},
|
||||||
|
"response_format": {
|
||||||
|
"$ref": "#/components/schemas/ResponseFormat"
|
||||||
|
},
|
||||||
"logprobs": {
|
"logprobs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2885,16 +2988,49 @@
|
||||||
"BatchChatCompletionResponse": {
|
"BatchChatCompletionResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"completion_message_batch": {
|
"batch": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/CompletionMessage"
|
"$ref": "#/components/schemas/ChatCompletionResponse"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"completion_message_batch"
|
"batch"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"ChatCompletionResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completion_message": {
|
||||||
|
"$ref": "#/components/schemas/CompletionMessage"
|
||||||
|
},
|
||||||
|
"logprobs": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/TokenLogProbs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completion_message"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"TokenLogProbs": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"logprobs_by_token": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"logprobs_by_token"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"BatchCompletionRequest": {
|
"BatchCompletionRequest": {
|
||||||
|
@ -2912,6 +3048,9 @@
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"$ref": "#/components/schemas/SamplingParams"
|
"$ref": "#/components/schemas/SamplingParams"
|
||||||
},
|
},
|
||||||
|
"response_format": {
|
||||||
|
"$ref": "#/components/schemas/ResponseFormat"
|
||||||
|
},
|
||||||
"logprobs": {
|
"logprobs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2932,18 +3071,41 @@
|
||||||
"BatchCompletionResponse": {
|
"BatchCompletionResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"completion_message_batch": {
|
"batch": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/CompletionMessage"
|
"$ref": "#/components/schemas/CompletionResponse"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"completion_message_batch"
|
"batch"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"CompletionResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop_reason": {
|
||||||
|
"$ref": "#/components/schemas/StopReason"
|
||||||
|
},
|
||||||
|
"logprobs": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/TokenLogProbs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"content",
|
||||||
|
"stop_reason"
|
||||||
|
],
|
||||||
|
"title": "Completion response."
|
||||||
|
},
|
||||||
"CancelTrainingJobRequest": {
|
"CancelTrainingJobRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2956,135 +3118,46 @@
|
||||||
"job_uuid"
|
"job_uuid"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"GrammarResponseFormat": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "grammar",
|
|
||||||
"default": "grammar"
|
|
||||||
},
|
|
||||||
"bnf": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"bnf"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"JsonSchemaResponseFormat": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "json_schema",
|
|
||||||
"default": "json_schema"
|
|
||||||
},
|
|
||||||
"json_schema": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"json_schema"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"ResponseFormat": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/GrammarResponseFormat"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"discriminator": {
|
|
||||||
"propertyName": "type",
|
|
||||||
"mapping": {
|
|
||||||
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
|
|
||||||
"grammar": "#/components/schemas/GrammarResponseFormat"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"ChatCompletionRequest": {
|
"ChatCompletionRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model_id": {
|
"model_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The identifier of the model to use"
|
||||||
},
|
},
|
||||||
"messages": {
|
"messages": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/Message"
|
"$ref": "#/components/schemas/Message"
|
||||||
}
|
},
|
||||||
|
"description": "List of messages in the conversation"
|
||||||
},
|
},
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"$ref": "#/components/schemas/SamplingParams"
|
"$ref": "#/components/schemas/SamplingParams",
|
||||||
|
"description": "Parameters to control the sampling strategy"
|
||||||
},
|
},
|
||||||
"tools": {
|
"tools": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolDefinition"
|
"$ref": "#/components/schemas/ToolDefinition"
|
||||||
}
|
},
|
||||||
|
"description": "(Optional) List of tool definitions available to the model"
|
||||||
},
|
},
|
||||||
"tool_choice": {
|
"tool_choice": {
|
||||||
"$ref": "#/components/schemas/ToolChoice"
|
"$ref": "#/components/schemas/ToolChoice",
|
||||||
|
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto."
|
||||||
},
|
},
|
||||||
"tool_prompt_format": {
|
"tool_prompt_format": {
|
||||||
"$ref": "#/components/schemas/ToolPromptFormat"
|
"$ref": "#/components/schemas/ToolPromptFormat",
|
||||||
|
"description": "(Optional) Specifies how tool definitions are formatted when presenting to the model"
|
||||||
},
|
},
|
||||||
"response_format": {
|
"response_format": {
|
||||||
"$ref": "#/components/schemas/ResponseFormat"
|
"$ref": "#/components/schemas/ResponseFormat",
|
||||||
|
"description": "(Optional) Grammar specification for guided (structured) decoding"
|
||||||
},
|
},
|
||||||
"stream": {
|
"stream": {
|
||||||
"type": "boolean"
|
"type": "boolean",
|
||||||
|
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||||
},
|
},
|
||||||
"logprobs": {
|
"logprobs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -3094,7 +3167,8 @@
|
||||||
"default": 0
|
"default": 0
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false,
|
||||||
|
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -3103,25 +3177,6 @@
|
||||||
"messages"
|
"messages"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"ChatCompletionResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"completion_message": {
|
|
||||||
"$ref": "#/components/schemas/CompletionMessage"
|
|
||||||
},
|
|
||||||
"logprobs": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/components/schemas/TokenLogProbs"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"completion_message"
|
|
||||||
],
|
|
||||||
"title": "Chat completion response."
|
|
||||||
},
|
|
||||||
"ChatCompletionResponseEvent": {
|
"ChatCompletionResponseEvent": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -3166,8 +3221,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"event"
|
"event"
|
||||||
],
|
]
|
||||||
"title": "SSE-stream of these events."
|
|
||||||
},
|
},
|
||||||
"ContentDelta": {
|
"ContentDelta": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
|
@ -3227,21 +3281,6 @@
|
||||||
"text"
|
"text"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"TokenLogProbs": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"logprobs_by_token": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"logprobs_by_token"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"ToolCallDelta": {
|
"ToolCallDelta": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -3284,19 +3323,24 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model_id": {
|
"model_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The identifier of the model to use"
|
||||||
},
|
},
|
||||||
"content": {
|
"content": {
|
||||||
"$ref": "#/components/schemas/InterleavedContent"
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
|
"description": "The content to generate a completion for"
|
||||||
},
|
},
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"$ref": "#/components/schemas/SamplingParams"
|
"$ref": "#/components/schemas/SamplingParams",
|
||||||
|
"description": "(Optional) Parameters to control the sampling strategy"
|
||||||
},
|
},
|
||||||
"response_format": {
|
"response_format": {
|
||||||
"$ref": "#/components/schemas/ResponseFormat"
|
"$ref": "#/components/schemas/ResponseFormat",
|
||||||
|
"description": "(Optional) Grammar specification for guided (structured) decoding"
|
||||||
},
|
},
|
||||||
"stream": {
|
"stream": {
|
||||||
"type": "boolean"
|
"type": "boolean",
|
||||||
|
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||||
},
|
},
|
||||||
"logprobs": {
|
"logprobs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -3306,7 +3350,8 @@
|
||||||
"default": 0
|
"default": 0
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false,
|
||||||
|
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -3315,29 +3360,6 @@
|
||||||
"content"
|
"content"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"CompletionResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"content": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"stop_reason": {
|
|
||||||
"$ref": "#/components/schemas/StopReason"
|
|
||||||
},
|
|
||||||
"logprobs": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/components/schemas/TokenLogProbs"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"content",
|
|
||||||
"stop_reason"
|
|
||||||
],
|
|
||||||
"title": "Completion response."
|
|
||||||
},
|
|
||||||
"CompletionResponseStreamChunk": {
|
"CompletionResponseStreamChunk": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -4241,13 +4263,15 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model_id": {
|
"model_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The identifier of the model to use"
|
||||||
},
|
},
|
||||||
"contents": {
|
"contents": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/InterleavedContent"
|
"$ref": "#/components/schemas/InterleavedContent"
|
||||||
}
|
},
|
||||||
|
"description": "List of contents to generate embeddings for. Note that content can be multimodal."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -7863,7 +7887,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ChatCompletionResponse",
|
"name": "ChatCompletionResponse",
|
||||||
"description": "Chat completion response."
|
"description": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ChatCompletionResponseEvent",
|
"name": "ChatCompletionResponseEvent",
|
||||||
|
@ -7875,7 +7899,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ChatCompletionResponseStreamChunk",
|
"name": "ChatCompletionResponseStreamChunk",
|
||||||
"description": "SSE-stream of these events."
|
"description": ""
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Checkpoint",
|
"name": "Checkpoint",
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
---
|
|
||||||
openapi: 3.1.0
|
openapi: 3.1.0
|
||||||
info:
|
info:
|
||||||
title: Llama Stack Specification
|
title: Llama Stack Specification
|
||||||
version: v1
|
version: v1
|
||||||
description: "This is the specification of the Llama Stack that provides\n \
|
description: >-
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored
|
This is the specification of the Llama Stack that provides
|
||||||
to\n best leverage Llama Models."
|
a set of endpoints and their corresponding interfaces that are
|
||||||
|
tailored to
|
||||||
|
best leverage Llama Models.
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
paths:
|
paths:
|
||||||
|
@ -108,7 +109,9 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: Chat completion response. **OR** SSE-stream of these events.
|
description: >-
|
||||||
|
If stream=False, returns a ChatCompletionResponse with the full completion.
|
||||||
|
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
|
||||||
content:
|
content:
|
||||||
text/event-stream:
|
text/event-stream:
|
||||||
schema:
|
schema:
|
||||||
|
@ -117,6 +120,8 @@ paths:
|
||||||
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
|
summary: >-
|
||||||
|
Generate a chat completion for the given messages using the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -128,7 +133,9 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: Completion response. **OR** streamed completion response.
|
description: >-
|
||||||
|
If stream=False, returns a CompletionResponse with the full completion.
|
||||||
|
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
|
||||||
content:
|
content:
|
||||||
text/event-stream:
|
text/event-stream:
|
||||||
schema:
|
schema:
|
||||||
|
@ -137,6 +144,8 @@ paths:
|
||||||
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
|
summary: >-
|
||||||
|
Generate a completion for the given content using the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -189,8 +198,9 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: A single turn in an interaction with an Agentic System. **OR**
|
description: >-
|
||||||
streamed agent turn completion response.
|
A single turn in an interaction with an Agentic System. **OR** streamed
|
||||||
|
agent turn completion response.
|
||||||
content:
|
content:
|
||||||
text/event-stream:
|
text/event-stream:
|
||||||
schema:
|
schema:
|
||||||
|
@ -279,13 +289,17 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: >-
|
||||||
|
An array of embeddings, one for each content. Each embedding is a list
|
||||||
|
of floats.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/EmbeddingsResponse'
|
$ref: '#/components/schemas/EmbeddingsResponse'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
|
summary: >-
|
||||||
|
Generate embeddings for content pieces using the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -709,7 +723,8 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- ToolRuntime
|
- ToolRuntime
|
||||||
summary: Index documents so they can be used by the RAG system
|
summary: >-
|
||||||
|
Index documents so they can be used by the RAG system
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -1109,7 +1124,8 @@ paths:
|
||||||
$ref: '#/components/schemas/RAGQueryResult'
|
$ref: '#/components/schemas/RAGQueryResult'
|
||||||
tags:
|
tags:
|
||||||
- ToolRuntime
|
- ToolRuntime
|
||||||
summary: Query the RAG system for context; typically invoked by the agent
|
summary: >-
|
||||||
|
Query the RAG system for context; typically invoked by the agent
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -1341,7 +1357,8 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
parameters: []
|
parameters: []
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: >-
|
||||||
|
https://json-schema.org/draft/2020-12/schema
|
||||||
components:
|
components:
|
||||||
schemas:
|
schemas:
|
||||||
AppendRowsRequest:
|
AppendRowsRequest:
|
||||||
|
@ -1393,6 +1410,27 @@ components:
|
||||||
- content
|
- content
|
||||||
- stop_reason
|
- stop_reason
|
||||||
- tool_calls
|
- tool_calls
|
||||||
|
GrammarResponseFormat:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: grammar
|
||||||
|
default: grammar
|
||||||
|
bnf:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- bnf
|
||||||
GreedySamplingStrategy:
|
GreedySamplingStrategy:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1439,6 +1477,27 @@ components:
|
||||||
mapping:
|
mapping:
|
||||||
image: '#/components/schemas/ImageContentItem'
|
image: '#/components/schemas/ImageContentItem'
|
||||||
text: '#/components/schemas/TextContentItem'
|
text: '#/components/schemas/TextContentItem'
|
||||||
|
JsonSchemaResponseFormat:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: json_schema
|
||||||
|
default: json_schema
|
||||||
|
json_schema:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- json_schema
|
||||||
Message:
|
Message:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/UserMessage'
|
- $ref: '#/components/schemas/UserMessage'
|
||||||
|
@ -1452,6 +1511,15 @@ components:
|
||||||
system: '#/components/schemas/SystemMessage'
|
system: '#/components/schemas/SystemMessage'
|
||||||
tool: '#/components/schemas/ToolResponseMessage'
|
tool: '#/components/schemas/ToolResponseMessage'
|
||||||
assistant: '#/components/schemas/CompletionMessage'
|
assistant: '#/components/schemas/CompletionMessage'
|
||||||
|
ResponseFormat:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
|
||||||
|
- $ref: '#/components/schemas/GrammarResponseFormat'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
|
||||||
|
grammar: '#/components/schemas/GrammarResponseFormat'
|
||||||
SamplingParams:
|
SamplingParams:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1594,16 +1662,28 @@ components:
|
||||||
- json
|
- json
|
||||||
- function_tag
|
- function_tag
|
||||||
- python_list
|
- python_list
|
||||||
title: This Enum refers to the prompt format for calling custom / zero shot
|
title: >-
|
||||||
tools
|
This Enum refers to the prompt format for calling custom / zero shot tools
|
||||||
description: "`json` --\n Refers to the json format for calling tools.\n\
|
description: >-
|
||||||
\ The json format takes the form like\n {\n \"type\": \"function\"\
|
`json` --
|
||||||
,\n \"function\" : {\n \"name\": \"function_name\",\n \
|
Refers to the json format for calling tools.
|
||||||
\ \"description\": \"function_description\",\n \"parameters\"\
|
The json format takes the form like
|
||||||
: {...}\n }\n }\n\n`function_tag` --\n This is an example of
|
{
|
||||||
how you could define\n your own user defined format for making tool calls.\n\
|
"type": "function",
|
||||||
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n
|
"function" : {
|
||||||
\nThe detailed prompts for each of these formats are added to llama cli"
|
"name": "function_name",
|
||||||
|
"description": "function_description",
|
||||||
|
"parameters": {...}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
`function_tag` --
|
||||||
|
This is an example of how you could define
|
||||||
|
your own user defined format for making tool calls.
|
||||||
|
The function_tag format looks like this,
|
||||||
|
<function=function_name>(parameters)</function>
|
||||||
|
|
||||||
|
The detailed prompts for each of these formats are added to llama cli
|
||||||
ToolResponseMessage:
|
ToolResponseMessage:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1697,6 +1777,8 @@ components:
|
||||||
$ref: '#/components/schemas/ToolChoice'
|
$ref: '#/components/schemas/ToolChoice'
|
||||||
tool_prompt_format:
|
tool_prompt_format:
|
||||||
$ref: '#/components/schemas/ToolPromptFormat'
|
$ref: '#/components/schemas/ToolPromptFormat'
|
||||||
|
response_format:
|
||||||
|
$ref: '#/components/schemas/ResponseFormat'
|
||||||
logprobs:
|
logprobs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1711,13 +1793,35 @@ components:
|
||||||
BatchChatCompletionResponse:
|
BatchChatCompletionResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
completion_message_batch:
|
batch:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- completion_message_batch
|
- batch
|
||||||
|
ChatCompletionResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completion_message:
|
||||||
|
$ref: '#/components/schemas/CompletionMessage'
|
||||||
|
logprobs:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/TokenLogProbs'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completion_message
|
||||||
|
TokenLogProbs:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
logprobs_by_token:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: number
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- logprobs_by_token
|
||||||
BatchCompletionRequest:
|
BatchCompletionRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1729,6 +1833,8 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
|
response_format:
|
||||||
|
$ref: '#/components/schemas/ResponseFormat'
|
||||||
logprobs:
|
logprobs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1743,13 +1849,29 @@ components:
|
||||||
BatchCompletionResponse:
|
BatchCompletionResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
completion_message_batch:
|
batch:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/CompletionResponse'
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- completion_message_batch
|
- batch
|
||||||
|
CompletionResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
stop_reason:
|
||||||
|
$ref: '#/components/schemas/StopReason'
|
||||||
|
logprobs:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/TokenLogProbs'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- content
|
||||||
|
- stop_reason
|
||||||
|
title: Completion response.
|
||||||
CancelTrainingJobRequest:
|
CancelTrainingJobRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1758,80 +1880,45 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- job_uuid
|
- job_uuid
|
||||||
GrammarResponseFormat:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: grammar
|
|
||||||
default: grammar
|
|
||||||
bnf:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- bnf
|
|
||||||
JsonSchemaResponseFormat:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: json_schema
|
|
||||||
default: json_schema
|
|
||||||
json_schema:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- json_schema
|
|
||||||
ResponseFormat:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
|
|
||||||
- $ref: '#/components/schemas/GrammarResponseFormat'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
|
|
||||||
grammar: '#/components/schemas/GrammarResponseFormat'
|
|
||||||
ChatCompletionRequest:
|
ChatCompletionRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model_id:
|
model_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The identifier of the model to use
|
||||||
messages:
|
messages:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/Message'
|
$ref: '#/components/schemas/Message'
|
||||||
|
description: List of messages in the conversation
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
|
description: >-
|
||||||
|
Parameters to control the sampling strategy
|
||||||
tools:
|
tools:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolDefinition'
|
$ref: '#/components/schemas/ToolDefinition'
|
||||||
|
description: >-
|
||||||
|
(Optional) List of tool definitions available to the model
|
||||||
tool_choice:
|
tool_choice:
|
||||||
$ref: '#/components/schemas/ToolChoice'
|
$ref: '#/components/schemas/ToolChoice'
|
||||||
|
description: >-
|
||||||
|
(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||||
tool_prompt_format:
|
tool_prompt_format:
|
||||||
$ref: '#/components/schemas/ToolPromptFormat'
|
$ref: '#/components/schemas/ToolPromptFormat'
|
||||||
|
description: >-
|
||||||
|
(Optional) Specifies how tool definitions are formatted when presenting
|
||||||
|
to the model
|
||||||
response_format:
|
response_format:
|
||||||
$ref: '#/components/schemas/ResponseFormat'
|
$ref: '#/components/schemas/ResponseFormat'
|
||||||
|
description: >-
|
||||||
|
(Optional) Grammar specification for guided (structured) decoding
|
||||||
stream:
|
stream:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||||
|
to False.
|
||||||
logprobs:
|
logprobs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1839,23 +1926,13 @@ components:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
default: 0
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
description: >-
|
||||||
|
(Optional) If specified, log probabilities for each token position will
|
||||||
|
be returned.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- model_id
|
- model_id
|
||||||
- messages
|
- messages
|
||||||
ChatCompletionResponse:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
completion_message:
|
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
|
||||||
logprobs:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/TokenLogProbs'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- completion_message
|
|
||||||
title: Chat completion response.
|
|
||||||
ChatCompletionResponseEvent:
|
ChatCompletionResponseEvent:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1888,7 +1965,6 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- event
|
- event
|
||||||
title: SSE-stream of these events.
|
|
||||||
ContentDelta:
|
ContentDelta:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/TextDelta'
|
- $ref: '#/components/schemas/TextDelta'
|
||||||
|
@ -1927,16 +2003,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- text
|
- text
|
||||||
TokenLogProbs:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
logprobs_by_token:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
type: number
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- logprobs_by_token
|
|
||||||
ToolCallDelta:
|
ToolCallDelta:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1967,14 +2033,23 @@ components:
|
||||||
properties:
|
properties:
|
||||||
model_id:
|
model_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The identifier of the model to use
|
||||||
content:
|
content:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
|
description: The content to generate a completion for
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
|
description: >-
|
||||||
|
(Optional) Parameters to control the sampling strategy
|
||||||
response_format:
|
response_format:
|
||||||
$ref: '#/components/schemas/ResponseFormat'
|
$ref: '#/components/schemas/ResponseFormat'
|
||||||
|
description: >-
|
||||||
|
(Optional) Grammar specification for guided (structured) decoding
|
||||||
stream:
|
stream:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||||
|
to False.
|
||||||
logprobs:
|
logprobs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -1982,26 +2057,13 @@ components:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
default: 0
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
description: >-
|
||||||
|
(Optional) If specified, log probabilities for each token position will
|
||||||
|
be returned.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- model_id
|
- model_id
|
||||||
- content
|
- content
|
||||||
CompletionResponse:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
content:
|
|
||||||
type: string
|
|
||||||
stop_reason:
|
|
||||||
$ref: '#/components/schemas/StopReason'
|
|
||||||
logprobs:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/TokenLogProbs'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- content
|
|
||||||
- stop_reason
|
|
||||||
title: Completion response.
|
|
||||||
CompletionResponseStreamChunk:
|
CompletionResponseStreamChunk:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -2558,7 +2620,8 @@ components:
|
||||||
- output_message
|
- output_message
|
||||||
- output_attachments
|
- output_attachments
|
||||||
- started_at
|
- started_at
|
||||||
title: A single turn in an interaction with an Agentic System.
|
title: >-
|
||||||
|
A single turn in an interaction with an Agentic System.
|
||||||
ViolationLevel:
|
ViolationLevel:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
@ -2570,10 +2633,14 @@ components:
|
||||||
properties:
|
properties:
|
||||||
model_id:
|
model_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The identifier of the model to use
|
||||||
contents:
|
contents:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
|
description: >-
|
||||||
|
List of contents to generate embeddings for. Note that content can be
|
||||||
|
multimodal.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- model_id
|
- model_id
|
||||||
|
@ -2845,7 +2912,8 @@ components:
|
||||||
- session_name
|
- session_name
|
||||||
- turns
|
- turns
|
||||||
- started_at
|
- started_at
|
||||||
title: A single session of an interaction with an Agentic System.
|
title: >-
|
||||||
|
A single session of an interaction with an Agentic System.
|
||||||
AgentStepResponse:
|
AgentStepResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -3194,7 +3262,8 @@ components:
|
||||||
- provider_resource_id
|
- provider_resource_id
|
||||||
- provider_id
|
- provider_id
|
||||||
- type
|
- type
|
||||||
title: A safety shield resource that can be used to check content
|
title: >-
|
||||||
|
A safety shield resource that can be used to check content
|
||||||
Span:
|
Span:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4684,8 +4753,9 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- synthetic_data
|
- synthetic_data
|
||||||
title: Response from the synthetic data generation. Batch of (prompt, response,
|
title: >-
|
||||||
score) tuples that pass the threshold.
|
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||||
|
tuples that pass the threshold.
|
||||||
VersionInfo:
|
VersionInfo:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4763,13 +4833,13 @@ tags:
|
||||||
- name: ChatCompletionRequest
|
- name: ChatCompletionRequest
|
||||||
description: ''
|
description: ''
|
||||||
- name: ChatCompletionResponse
|
- name: ChatCompletionResponse
|
||||||
description: Chat completion response.
|
description: ''
|
||||||
- name: ChatCompletionResponseEvent
|
- name: ChatCompletionResponseEvent
|
||||||
description: Chat completion response event.
|
description: Chat completion response event.
|
||||||
- name: ChatCompletionResponseEventType
|
- name: ChatCompletionResponseEventType
|
||||||
description: ''
|
description: ''
|
||||||
- name: ChatCompletionResponseStreamChunk
|
- name: ChatCompletionResponseStreamChunk
|
||||||
description: SSE-stream of these events.
|
description: ''
|
||||||
- name: Checkpoint
|
- name: Checkpoint
|
||||||
description: Checkpoint created during training runs
|
description: Checkpoint created during training runs
|
||||||
- name: CompletionInputType
|
- name: CompletionInputType
|
||||||
|
@ -4998,9 +5068,11 @@ tags:
|
||||||
- name: ScoringResult
|
- name: ScoringResult
|
||||||
description: ''
|
description: ''
|
||||||
- name: Session
|
- name: Session
|
||||||
description: A single session of an interaction with an Agentic System.
|
description: >-
|
||||||
|
A single session of an interaction with an Agentic System.
|
||||||
- name: Shield
|
- name: Shield
|
||||||
description: A safety shield resource that can be used to check content
|
description: >-
|
||||||
|
A safety shield resource that can be used to check content
|
||||||
- name: ShieldCallStep
|
- name: ShieldCallStep
|
||||||
description: ''
|
description: ''
|
||||||
- name: Shields
|
- name: Shields
|
||||||
|
@ -5028,8 +5100,9 @@ tags:
|
||||||
description: ''
|
description: ''
|
||||||
- name: SyntheticDataGeneration (Coming Soon)
|
- name: SyntheticDataGeneration (Coming Soon)
|
||||||
- name: SyntheticDataGenerationResponse
|
- name: SyntheticDataGenerationResponse
|
||||||
description: Response from the synthetic data generation. Batch of (prompt, response,
|
description: >-
|
||||||
score) tuples that pass the threshold.
|
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||||
|
tuples that pass the threshold.
|
||||||
- name: SystemMessage
|
- name: SystemMessage
|
||||||
description: ''
|
description: ''
|
||||||
- name: Telemetry
|
- name: Telemetry
|
||||||
|
@ -5067,15 +5140,29 @@ tags:
|
||||||
- name: ToolParameter
|
- name: ToolParameter
|
||||||
description: ''
|
description: ''
|
||||||
- name: ToolPromptFormat
|
- name: ToolPromptFormat
|
||||||
description: "This Enum refers to the prompt format for calling custom / zero
|
description: >-
|
||||||
shot tools\n\n`json` --\n Refers to the json format for calling tools.\n\
|
This Enum refers to the prompt format for calling custom / zero shot tools
|
||||||
\ The json format takes the form like\n {\n \"type\": \"function\"\
|
|
||||||
,\n \"function\" : {\n \"name\": \"function_name\",\n \
|
|
||||||
\ \"description\": \"function_description\",\n \"parameters\"\
|
`json` --
|
||||||
: {...}\n }\n }\n\n`function_tag` --\n This is an example of how
|
Refers to the json format for calling tools.
|
||||||
you could define\n your own user defined format for making tool calls.\n\
|
The json format takes the form like
|
||||||
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n
|
{
|
||||||
\nThe detailed prompts for each of these formats are added to llama cli"
|
"type": "function",
|
||||||
|
"function" : {
|
||||||
|
"name": "function_name",
|
||||||
|
"description": "function_description",
|
||||||
|
"parameters": {...}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
`function_tag` --
|
||||||
|
This is an example of how you could define
|
||||||
|
your own user defined format for making tool calls.
|
||||||
|
The function_tag format looks like this,
|
||||||
|
<function=function_name>(parameters)</function>
|
||||||
|
|
||||||
|
The detailed prompts for each of these formats are added to llama cli
|
||||||
- name: ToolResponse
|
- name: ToolResponse
|
||||||
description: ''
|
description: ''
|
||||||
- name: ToolResponseMessage
|
- name: ToolResponseMessage
|
||||||
|
@ -5090,7 +5177,8 @@ tags:
|
||||||
- name: TrainingConfig
|
- name: TrainingConfig
|
||||||
description: ''
|
description: ''
|
||||||
- name: Turn
|
- name: Turn
|
||||||
description: A single turn in an interaction with an Agentic System.
|
description: >-
|
||||||
|
A single turn in an interaction with an Agentic System.
|
||||||
- name: URL
|
- name: URL
|
||||||
description: ''
|
description: ''
|
||||||
- name: UnionType
|
- name: UnionType
|
||||||
|
|
|
@ -7,13 +7,15 @@
|
||||||
from typing import List, Optional, Protocol, runtime_checkable
|
from typing import List, Optional, Protocol, runtime_checkable
|
||||||
|
|
||||||
from llama_models.schema_utils import json_schema_type, webmethod
|
from llama_models.schema_utils import json_schema_type, webmethod
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
CompletionMessage,
|
ChatCompletionResponse,
|
||||||
|
CompletionResponse,
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
|
@ -21,35 +23,14 @@ from llama_stack.apis.inference import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class BatchCompletionRequest(BaseModel):
|
|
||||||
model: str
|
|
||||||
content_batch: List[InterleavedContent]
|
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
|
||||||
logprobs: Optional[LogProbConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class BatchCompletionResponse(BaseModel):
|
class BatchCompletionResponse(BaseModel):
|
||||||
completion_message_batch: List[CompletionMessage]
|
batch: List[CompletionResponse]
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class BatchChatCompletionRequest(BaseModel):
|
|
||||||
model: str
|
|
||||||
messages_batch: List[List[Message]]
|
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
|
||||||
|
|
||||||
# zero-shot tool definitions as input to the model
|
|
||||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
|
||||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
|
||||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
|
||||||
logprobs: Optional[LogProbConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class BatchChatCompletionResponse(BaseModel):
|
class BatchChatCompletionResponse(BaseModel):
|
||||||
completion_message_batch: List[CompletionMessage]
|
batch: List[ChatCompletionResponse]
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
|
@ -60,6 +41,7 @@ class BatchInference(Protocol):
|
||||||
model: str,
|
model: str,
|
||||||
content_batch: List[InterleavedContent],
|
content_batch: List[InterleavedContent],
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
|
response_format: Optional[ResponseFormat] = None,
|
||||||
logprobs: Optional[LogProbConfig] = None,
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
) -> BatchCompletionResponse: ...
|
) -> BatchCompletionResponse: ...
|
||||||
|
|
||||||
|
@ -73,5 +55,6 @@ class BatchInference(Protocol):
|
||||||
tools: Optional[List[ToolDefinition]] = list,
|
tools: Optional[List[ToolDefinition]] = list,
|
||||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||||
|
response_format: Optional[ResponseFormat] = None,
|
||||||
logprobs: Optional[LogProbConfig] = None,
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
) -> BatchChatCompletionResponse: ...
|
) -> BatchChatCompletionResponse: ...
|
||||||
|
|
|
@ -186,7 +186,6 @@ ResponseFormat = register_schema(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class CompletionRequest(BaseModel):
|
class CompletionRequest(BaseModel):
|
||||||
model: str
|
model: str
|
||||||
content: InterleavedContent
|
content: InterleavedContent
|
||||||
|
@ -215,23 +214,6 @@ class CompletionResponseStreamChunk(BaseModel):
|
||||||
logprobs: Optional[List[TokenLogProbs]] = None
|
logprobs: Optional[List[TokenLogProbs]] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class BatchCompletionRequest(BaseModel):
|
|
||||||
model: str
|
|
||||||
content_batch: List[InterleavedContent]
|
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
|
||||||
response_format: Optional[ResponseFormat] = None
|
|
||||||
logprobs: Optional[LogProbConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class BatchCompletionResponse(BaseModel):
|
|
||||||
"""Batch completion response."""
|
|
||||||
|
|
||||||
batch: List[CompletionResponse]
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class ChatCompletionRequest(BaseModel):
|
class ChatCompletionRequest(BaseModel):
|
||||||
model: str
|
model: str
|
||||||
messages: List[Message]
|
messages: List[Message]
|
||||||
|
@ -249,37 +231,15 @@ class ChatCompletionRequest(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ChatCompletionResponseStreamChunk(BaseModel):
|
class ChatCompletionResponseStreamChunk(BaseModel):
|
||||||
"""SSE-stream of these events."""
|
|
||||||
|
|
||||||
event: ChatCompletionResponseEvent
|
event: ChatCompletionResponseEvent
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ChatCompletionResponse(BaseModel):
|
class ChatCompletionResponse(BaseModel):
|
||||||
"""Chat completion response."""
|
|
||||||
|
|
||||||
completion_message: CompletionMessage
|
completion_message: CompletionMessage
|
||||||
logprobs: Optional[List[TokenLogProbs]] = None
|
logprobs: Optional[List[TokenLogProbs]] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class BatchChatCompletionRequest(BaseModel):
|
|
||||||
model: str
|
|
||||||
messages_batch: List[List[Message]]
|
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
|
||||||
|
|
||||||
# zero-shot tool definitions as input to the model
|
|
||||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
|
||||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
|
||||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
|
||||||
logprobs: Optional[LogProbConfig] = None
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class BatchChatCompletionResponse(BaseModel):
|
|
||||||
batch: List[ChatCompletionResponse]
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EmbeddingsResponse(BaseModel):
|
class EmbeddingsResponse(BaseModel):
|
||||||
embeddings: List[List[float]]
|
embeddings: List[List[float]]
|
||||||
|
@ -303,7 +263,19 @@ class Inference(Protocol):
|
||||||
response_format: Optional[ResponseFormat] = None,
|
response_format: Optional[ResponseFormat] = None,
|
||||||
stream: Optional[bool] = False,
|
stream: Optional[bool] = False,
|
||||||
logprobs: Optional[LogProbConfig] = None,
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...
|
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
|
||||||
|
"""Generate a completion for the given content using the specified model.
|
||||||
|
|
||||||
|
:param model_id: The identifier of the model to use
|
||||||
|
:param content: The content to generate a completion for
|
||||||
|
:param sampling_params: (Optional) Parameters to control the sampling strategy
|
||||||
|
:param response_format: (Optional) Grammar specification for guided (structured) decoding
|
||||||
|
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||||
|
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||||
|
:returns: If stream=False, returns a CompletionResponse with the full completion.
|
||||||
|
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/inference/chat-completion", method="POST")
|
@webmethod(route="/inference/chat-completion", method="POST")
|
||||||
async def chat_completion(
|
async def chat_completion(
|
||||||
|
@ -311,7 +283,6 @@ class Inference(Protocol):
|
||||||
model_id: str,
|
model_id: str,
|
||||||
messages: List[Message],
|
messages: List[Message],
|
||||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
# zero-shot tool definitions as input to the model
|
|
||||||
tools: Optional[List[ToolDefinition]] = None,
|
tools: Optional[List[ToolDefinition]] = None,
|
||||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||||
|
@ -320,11 +291,33 @@ class Inference(Protocol):
|
||||||
logprobs: Optional[LogProbConfig] = None,
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
) -> Union[
|
) -> Union[
|
||||||
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
|
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
|
||||||
]: ...
|
]:
|
||||||
|
"""Generate a chat completion for the given messages using the specified model.
|
||||||
|
|
||||||
|
:param model_id: The identifier of the model to use
|
||||||
|
:param messages: List of messages in the conversation
|
||||||
|
:param sampling_params: Parameters to control the sampling strategy
|
||||||
|
:param tools: (Optional) List of tool definitions available to the model
|
||||||
|
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||||
|
:param tool_prompt_format: (Optional) Specifies how tool definitions are formatted when presenting to the model
|
||||||
|
:param response_format: (Optional) Grammar specification for guided (structured) decoding
|
||||||
|
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||||
|
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||||
|
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
|
||||||
|
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/inference/embeddings", method="POST")
|
@webmethod(route="/inference/embeddings", method="POST")
|
||||||
async def embeddings(
|
async def embeddings(
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
contents: List[InterleavedContent],
|
contents: List[InterleavedContent],
|
||||||
) -> EmbeddingsResponse: ...
|
) -> EmbeddingsResponse:
|
||||||
|
"""Generate embeddings for content pieces using the specified model.
|
||||||
|
|
||||||
|
:param model_id: The identifier of the model to use
|
||||||
|
:param contents: List of contents to generate embeddings for. Note that content can be multimodal.
|
||||||
|
:returns: An array of embeddings, one for each content. Each embedding is a list of floats.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue