mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 10:42:39 +00:00
Update OpenAPI generator to add param and field documentation
This commit is contained in:
parent
9f709387e2
commit
ebfa8ad4fb
7 changed files with 525 additions and 397 deletions
|
@ -36,6 +36,16 @@ from .pyopenapi.specification import Info, Server # noqa: E402
|
|||
from .pyopenapi.utility import Specification # noqa: E402
|
||||
|
||||
|
||||
def str_presenter(dumper, data):
|
||||
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
|
||||
"#/components/schemas/"
|
||||
):
|
||||
style = None
|
||||
else:
|
||||
style = ">" if "\n" in data or len(data) > 40 else None
|
||||
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
||||
|
||||
|
||||
def main(output_dir: str):
|
||||
output_dir = Path(output_dir)
|
||||
if not output_dir.exists():
|
||||
|
@ -69,7 +79,8 @@ def main(output_dir: str):
|
|||
y.sequence_dash_offset = 2
|
||||
y.width = 80
|
||||
y.allow_unicode = True
|
||||
y.explicit_start = True
|
||||
y.representer.add_representer(str, str_presenter)
|
||||
|
||||
y.dump(
|
||||
spec.get_json(),
|
||||
fp,
|
||||
|
|
|
@ -8,6 +8,7 @@ import collections
|
|||
import hashlib
|
||||
import ipaddress
|
||||
import typing
|
||||
from dataclasses import field, make_dataclass
|
||||
from typing import Any, Dict, Set, Union
|
||||
|
||||
from ..strong_typing.core import JsonType
|
||||
|
@ -276,6 +277,20 @@ class StatusResponse:
|
|||
examples: List[Any] = dataclasses.field(default_factory=list)
|
||||
|
||||
|
||||
def create_docstring_for_request(
|
||||
request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
|
||||
) -> str:
|
||||
"""Creates a ReST-style docstring for a dynamically generated request dataclass."""
|
||||
lines = ["\n"] # Short description
|
||||
|
||||
# Add parameter documentation in ReST format
|
||||
for name, type_ in fields:
|
||||
desc = doc_params.get(name, "")
|
||||
lines.append(f":param {name}: {desc}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class ResponseBuilder:
|
||||
content_builder: ContentBuilder
|
||||
|
||||
|
@ -493,11 +508,24 @@ class Generator:
|
|||
first = next(iter(op.request_params))
|
||||
request_name, request_type = first
|
||||
|
||||
from dataclasses import make_dataclass
|
||||
|
||||
op_name = "".join(word.capitalize() for word in op.name.split("_"))
|
||||
request_name = f"{op_name}Request"
|
||||
request_type = make_dataclass(request_name, op.request_params)
|
||||
fields = [
|
||||
(
|
||||
name,
|
||||
type_,
|
||||
)
|
||||
for name, type_ in op.request_params
|
||||
]
|
||||
request_type = make_dataclass(
|
||||
request_name,
|
||||
fields,
|
||||
namespace={
|
||||
"__doc__": create_docstring_for_request(
|
||||
request_name, fields, doc_params
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
requestBody = RequestBody(
|
||||
content={
|
||||
|
|
|
@ -531,6 +531,7 @@ class JsonSchemaGenerator:
|
|||
# add property docstring if available
|
||||
property_doc = property_docstrings.get(property_name)
|
||||
if property_doc:
|
||||
# print(output_name, property_doc)
|
||||
property_def.pop("title", None)
|
||||
property_def["description"] = property_doc
|
||||
|
||||
|
|
|
@ -190,7 +190,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Chat completion response. **OR** SSE-stream of these events.",
|
||||
"description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
|
||||
"content": {
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
|
@ -210,6 +210,7 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate a chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -227,7 +228,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Completion response. **OR** streamed completion response.",
|
||||
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
|
||||
"content": {
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
|
@ -247,6 +248,7 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate a completion for the given content using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -485,7 +487,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "An array of embeddings, one for each content. Each embedding is a list of floats.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -498,6 +500,7 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate embeddings for content pieces using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -2372,6 +2375,46 @@
|
|||
"tool_calls"
|
||||
]
|
||||
},
|
||||
"GrammarResponseFormat": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "grammar",
|
||||
"default": "grammar"
|
||||
},
|
||||
"bnf": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"bnf"
|
||||
]
|
||||
},
|
||||
"GreedySamplingStrategy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2447,6 +2490,46 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"JsonSchemaResponseFormat": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "json_schema",
|
||||
"default": "json_schema"
|
||||
},
|
||||
"json_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"json_schema"
|
||||
]
|
||||
},
|
||||
"Message": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
@ -2472,6 +2555,23 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"ResponseFormat": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GrammarResponseFormat"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
|
||||
"grammar": "#/components/schemas/GrammarResponseFormat"
|
||||
}
|
||||
}
|
||||
},
|
||||
"SamplingParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2865,6 +2965,9 @@
|
|||
"tool_prompt_format": {
|
||||
"$ref": "#/components/schemas/ToolPromptFormat"
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2885,16 +2988,49 @@
|
|||
"BatchChatCompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completion_message_batch": {
|
||||
"batch": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/CompletionMessage"
|
||||
"$ref": "#/components/schemas/ChatCompletionResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completion_message_batch"
|
||||
"batch"
|
||||
]
|
||||
},
|
||||
"ChatCompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completion_message": {
|
||||
"$ref": "#/components/schemas/CompletionMessage"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/TokenLogProbs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completion_message"
|
||||
]
|
||||
},
|
||||
"TokenLogProbs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"logprobs_by_token": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"logprobs_by_token"
|
||||
]
|
||||
},
|
||||
"BatchCompletionRequest": {
|
||||
|
@ -2912,6 +3048,9 @@
|
|||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams"
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2932,18 +3071,41 @@
|
|||
"BatchCompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completion_message_batch": {
|
||||
"batch": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/CompletionMessage"
|
||||
"$ref": "#/components/schemas/CompletionResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completion_message_batch"
|
||||
"batch"
|
||||
]
|
||||
},
|
||||
"CompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_reason": {
|
||||
"$ref": "#/components/schemas/StopReason"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/TokenLogProbs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"stop_reason"
|
||||
],
|
||||
"title": "Completion response."
|
||||
},
|
||||
"CancelTrainingJobRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2956,135 +3118,46 @@
|
|||
"job_uuid"
|
||||
]
|
||||
},
|
||||
"GrammarResponseFormat": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "grammar",
|
||||
"default": "grammar"
|
||||
},
|
||||
"bnf": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"bnf"
|
||||
]
|
||||
},
|
||||
"JsonSchemaResponseFormat": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "json_schema",
|
||||
"default": "json_schema"
|
||||
},
|
||||
"json_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"json_schema"
|
||||
]
|
||||
},
|
||||
"ResponseFormat": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GrammarResponseFormat"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
|
||||
"grammar": "#/components/schemas/GrammarResponseFormat"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ChatCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use"
|
||||
},
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Message"
|
||||
}
|
||||
},
|
||||
"description": "List of messages in the conversation"
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams"
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "Parameters to control the sampling strategy"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolDefinition"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) List of tool definitions available to the model"
|
||||
},
|
||||
"tool_choice": {
|
||||
"$ref": "#/components/schemas/ToolChoice"
|
||||
"$ref": "#/components/schemas/ToolChoice",
|
||||
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto."
|
||||
},
|
||||
"tool_prompt_format": {
|
||||
"$ref": "#/components/schemas/ToolPromptFormat"
|
||||
"$ref": "#/components/schemas/ToolPromptFormat",
|
||||
"description": "(Optional) Specifies how tool definitions are formatted when presenting to the model"
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat"
|
||||
"$ref": "#/components/schemas/ResponseFormat",
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
"type": "boolean",
|
||||
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
|
@ -3094,7 +3167,8 @@
|
|||
"default": 0
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
"additionalProperties": false,
|
||||
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -3103,25 +3177,6 @@
|
|||
"messages"
|
||||
]
|
||||
},
|
||||
"ChatCompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completion_message": {
|
||||
"$ref": "#/components/schemas/CompletionMessage"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/TokenLogProbs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completion_message"
|
||||
],
|
||||
"title": "Chat completion response."
|
||||
},
|
||||
"ChatCompletionResponseEvent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -3166,8 +3221,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"event"
|
||||
],
|
||||
"title": "SSE-stream of these events."
|
||||
]
|
||||
},
|
||||
"ContentDelta": {
|
||||
"oneOf": [
|
||||
|
@ -3227,21 +3281,6 @@
|
|||
"text"
|
||||
]
|
||||
},
|
||||
"TokenLogProbs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"logprobs_by_token": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"logprobs_by_token"
|
||||
]
|
||||
},
|
||||
"ToolCallDelta": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -3284,19 +3323,24 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent"
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content to generate a completion for"
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams"
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "(Optional) Parameters to control the sampling strategy"
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat"
|
||||
"$ref": "#/components/schemas/ResponseFormat",
|
||||
"description": "(Optional) Grammar specification for guided (structured) decoding"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
"type": "boolean",
|
||||
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "object",
|
||||
|
@ -3306,7 +3350,8 @@
|
|||
"default": 0
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
"additionalProperties": false,
|
||||
"description": "(Optional) If specified, log probabilities for each token position will be returned."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -3315,29 +3360,6 @@
|
|||
"content"
|
||||
]
|
||||
},
|
||||
"CompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"stop_reason": {
|
||||
"$ref": "#/components/schemas/StopReason"
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/TokenLogProbs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"stop_reason"
|
||||
],
|
||||
"title": "Completion response."
|
||||
},
|
||||
"CompletionResponseStreamChunk": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -4241,13 +4263,15 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use"
|
||||
},
|
||||
"contents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/InterleavedContent"
|
||||
}
|
||||
},
|
||||
"description": "List of contents to generate embeddings for. Note that content can be multimodal."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -7863,7 +7887,7 @@
|
|||
},
|
||||
{
|
||||
"name": "ChatCompletionResponse",
|
||||
"description": "Chat completion response."
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"name": "ChatCompletionResponseEvent",
|
||||
|
@ -7875,7 +7899,7 @@
|
|||
},
|
||||
{
|
||||
"name": "ChatCompletionResponseStreamChunk",
|
||||
"description": "SSE-stream of these events."
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"name": "Checkpoint",
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
---
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: Llama Stack Specification
|
||||
version: v1
|
||||
description: "This is the specification of the Llama Stack that provides\n \
|
||||
\ a set of endpoints and their corresponding interfaces that are tailored
|
||||
to\n best leverage Llama Models."
|
||||
description: >-
|
||||
This is the specification of the Llama Stack that provides
|
||||
a set of endpoints and their corresponding interfaces that are
|
||||
tailored to
|
||||
best leverage Llama Models.
|
||||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
|
@ -108,7 +109,9 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: Chat completion response. **OR** SSE-stream of these events.
|
||||
description: >-
|
||||
If stream=False, returns a ChatCompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
|
||||
content:
|
||||
text/event-stream:
|
||||
schema:
|
||||
|
@ -117,6 +120,8 @@ paths:
|
|||
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate a chat completion for the given messages using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -128,7 +133,9 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: Completion response. **OR** streamed completion response.
|
||||
description: >-
|
||||
If stream=False, returns a CompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
|
||||
content:
|
||||
text/event-stream:
|
||||
schema:
|
||||
|
@ -137,6 +144,8 @@ paths:
|
|||
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate a completion for the given content using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -189,8 +198,9 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A single turn in an interaction with an Agentic System. **OR**
|
||||
streamed agent turn completion response.
|
||||
description: >-
|
||||
A single turn in an interaction with an Agentic System. **OR** streamed
|
||||
agent turn completion response.
|
||||
content:
|
||||
text/event-stream:
|
||||
schema:
|
||||
|
@ -279,13 +289,17 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
An array of embeddings, one for each content. Each embedding is a list
|
||||
of floats.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EmbeddingsResponse'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate embeddings for content pieces using the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -709,7 +723,8 @@ paths:
|
|||
description: OK
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: Index documents so they can be used by the RAG system
|
||||
summary: >-
|
||||
Index documents so they can be used by the RAG system
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -1109,7 +1124,8 @@ paths:
|
|||
$ref: '#/components/schemas/RAGQueryResult'
|
||||
tags:
|
||||
- ToolRuntime
|
||||
summary: Query the RAG system for context; typically invoked by the agent
|
||||
summary: >-
|
||||
Query the RAG system for context; typically invoked by the agent
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -1341,7 +1357,8 @@ paths:
|
|||
tags:
|
||||
- Inspect
|
||||
parameters: []
|
||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||
jsonSchemaDialect: >-
|
||||
https://json-schema.org/draft/2020-12/schema
|
||||
components:
|
||||
schemas:
|
||||
AppendRowsRequest:
|
||||
|
@ -1393,6 +1410,27 @@ components:
|
|||
- content
|
||||
- stop_reason
|
||||
- tool_calls
|
||||
GrammarResponseFormat:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: grammar
|
||||
default: grammar
|
||||
bnf:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- bnf
|
||||
GreedySamplingStrategy:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1439,6 +1477,27 @@ components:
|
|||
mapping:
|
||||
image: '#/components/schemas/ImageContentItem'
|
||||
text: '#/components/schemas/TextContentItem'
|
||||
JsonSchemaResponseFormat:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: json_schema
|
||||
default: json_schema
|
||||
json_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- json_schema
|
||||
Message:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UserMessage'
|
||||
|
@ -1452,6 +1511,15 @@ components:
|
|||
system: '#/components/schemas/SystemMessage'
|
||||
tool: '#/components/schemas/ToolResponseMessage'
|
||||
assistant: '#/components/schemas/CompletionMessage'
|
||||
ResponseFormat:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
|
||||
- $ref: '#/components/schemas/GrammarResponseFormat'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
|
||||
grammar: '#/components/schemas/GrammarResponseFormat'
|
||||
SamplingParams:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1594,16 +1662,28 @@ components:
|
|||
- json
|
||||
- function_tag
|
||||
- python_list
|
||||
title: This Enum refers to the prompt format for calling custom / zero shot
|
||||
tools
|
||||
description: "`json` --\n Refers to the json format for calling tools.\n\
|
||||
\ The json format takes the form like\n {\n \"type\": \"function\"\
|
||||
,\n \"function\" : {\n \"name\": \"function_name\",\n \
|
||||
\ \"description\": \"function_description\",\n \"parameters\"\
|
||||
: {...}\n }\n }\n\n`function_tag` --\n This is an example of
|
||||
how you could define\n your own user defined format for making tool calls.\n\
|
||||
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n
|
||||
\nThe detailed prompts for each of these formats are added to llama cli"
|
||||
title: >-
|
||||
This Enum refers to the prompt format for calling custom / zero shot tools
|
||||
description: >-
|
||||
`json` --
|
||||
Refers to the json format for calling tools.
|
||||
The json format takes the form like
|
||||
{
|
||||
"type": "function",
|
||||
"function" : {
|
||||
"name": "function_name",
|
||||
"description": "function_description",
|
||||
"parameters": {...}
|
||||
}
|
||||
}
|
||||
|
||||
`function_tag` --
|
||||
This is an example of how you could define
|
||||
your own user defined format for making tool calls.
|
||||
The function_tag format looks like this,
|
||||
<function=function_name>(parameters)</function>
|
||||
|
||||
The detailed prompts for each of these formats are added to llama cli
|
||||
ToolResponseMessage:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1697,6 +1777,8 @@ components:
|
|||
$ref: '#/components/schemas/ToolChoice'
|
||||
tool_prompt_format:
|
||||
$ref: '#/components/schemas/ToolPromptFormat'
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1711,13 +1793,35 @@ components:
|
|||
BatchChatCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
completion_message_batch:
|
||||
batch:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completion_message_batch
|
||||
- batch
|
||||
ChatCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completion_message
|
||||
TokenLogProbs:
|
||||
type: object
|
||||
properties:
|
||||
logprobs_by_token:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
additionalProperties: false
|
||||
required:
|
||||
- logprobs_by_token
|
||||
BatchCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1729,6 +1833,8 @@ components:
|
|||
$ref: '#/components/schemas/InterleavedContent'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1743,13 +1849,29 @@ components:
|
|||
BatchCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
completion_message_batch:
|
||||
batch:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
$ref: '#/components/schemas/CompletionResponse'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completion_message_batch
|
||||
- batch
|
||||
CompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
stop_reason:
|
||||
$ref: '#/components/schemas/StopReason'
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- stop_reason
|
||||
title: Completion response.
|
||||
CancelTrainingJobRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1758,80 +1880,45 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- job_uuid
|
||||
GrammarResponseFormat:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: grammar
|
||||
default: grammar
|
||||
bnf:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- bnf
|
||||
JsonSchemaResponseFormat:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: json_schema
|
||||
default: json_schema
|
||||
json_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- json_schema
|
||||
ResponseFormat:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
|
||||
- $ref: '#/components/schemas/GrammarResponseFormat'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
|
||||
grammar: '#/components/schemas/GrammarResponseFormat'
|
||||
ChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: The identifier of the model to use
|
||||
messages:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Message'
|
||||
description: List of messages in the conversation
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: >-
|
||||
Parameters to control the sampling strategy
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolDefinition'
|
||||
description: >-
|
||||
(Optional) List of tool definitions available to the model
|
||||
tool_choice:
|
||||
$ref: '#/components/schemas/ToolChoice'
|
||||
description: >-
|
||||
(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||
tool_prompt_format:
|
||||
$ref: '#/components/schemas/ToolPromptFormat'
|
||||
description: >-
|
||||
(Optional) Specifies how tool definitions are formatted when presenting
|
||||
to the model
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
description: >-
|
||||
(Optional) Grammar specification for guided (structured) decoding
|
||||
stream:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||
to False.
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1839,23 +1926,13 @@ components:
|
|||
type: integer
|
||||
default: 0
|
||||
additionalProperties: false
|
||||
description: >-
|
||||
(Optional) If specified, log probabilities for each token position will
|
||||
be returned.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
- messages
|
||||
ChatCompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completion_message
|
||||
title: Chat completion response.
|
||||
ChatCompletionResponseEvent:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1888,7 +1965,6 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- event
|
||||
title: SSE-stream of these events.
|
||||
ContentDelta:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/TextDelta'
|
||||
|
@ -1927,16 +2003,6 @@ components:
|
|||
required:
|
||||
- type
|
||||
- text
|
||||
TokenLogProbs:
|
||||
type: object
|
||||
properties:
|
||||
logprobs_by_token:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
additionalProperties: false
|
||||
required:
|
||||
- logprobs_by_token
|
||||
ToolCallDelta:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1967,14 +2033,23 @@ components:
|
|||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: The identifier of the model to use
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: The content to generate a completion for
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: >-
|
||||
(Optional) Parameters to control the sampling strategy
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
description: >-
|
||||
(Optional) Grammar specification for guided (structured) decoding
|
||||
stream:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||
to False.
|
||||
logprobs:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -1982,26 +2057,13 @@ components:
|
|||
type: integer
|
||||
default: 0
|
||||
additionalProperties: false
|
||||
description: >-
|
||||
(Optional) If specified, log probabilities for each token position will
|
||||
be returned.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
- content
|
||||
CompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
stop_reason:
|
||||
$ref: '#/components/schemas/StopReason'
|
||||
logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- stop_reason
|
||||
title: Completion response.
|
||||
CompletionResponseStreamChunk:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -2558,7 +2620,8 @@ components:
|
|||
- output_message
|
||||
- output_attachments
|
||||
- started_at
|
||||
title: A single turn in an interaction with an Agentic System.
|
||||
title: >-
|
||||
A single turn in an interaction with an Agentic System.
|
||||
ViolationLevel:
|
||||
type: string
|
||||
enum:
|
||||
|
@ -2570,10 +2633,14 @@ components:
|
|||
properties:
|
||||
model_id:
|
||||
type: string
|
||||
description: The identifier of the model to use
|
||||
contents:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
List of contents to generate embeddings for. Note that content can be
|
||||
multimodal.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model_id
|
||||
|
@ -2845,7 +2912,8 @@ components:
|
|||
- session_name
|
||||
- turns
|
||||
- started_at
|
||||
title: A single session of an interaction with an Agentic System.
|
||||
title: >-
|
||||
A single session of an interaction with an Agentic System.
|
||||
AgentStepResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3194,7 +3262,8 @@ components:
|
|||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
title: A safety shield resource that can be used to check content
|
||||
title: >-
|
||||
A safety shield resource that can be used to check content
|
||||
Span:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4684,8 +4753,9 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- synthetic_data
|
||||
title: Response from the synthetic data generation. Batch of (prompt, response,
|
||||
score) tuples that pass the threshold.
|
||||
title: >-
|
||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
VersionInfo:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4763,13 +4833,13 @@ tags:
|
|||
- name: ChatCompletionRequest
|
||||
description: ''
|
||||
- name: ChatCompletionResponse
|
||||
description: Chat completion response.
|
||||
description: ''
|
||||
- name: ChatCompletionResponseEvent
|
||||
description: Chat completion response event.
|
||||
- name: ChatCompletionResponseEventType
|
||||
description: ''
|
||||
- name: ChatCompletionResponseStreamChunk
|
||||
description: SSE-stream of these events.
|
||||
description: ''
|
||||
- name: Checkpoint
|
||||
description: Checkpoint created during training runs
|
||||
- name: CompletionInputType
|
||||
|
@ -4998,9 +5068,11 @@ tags:
|
|||
- name: ScoringResult
|
||||
description: ''
|
||||
- name: Session
|
||||
description: A single session of an interaction with an Agentic System.
|
||||
description: >-
|
||||
A single session of an interaction with an Agentic System.
|
||||
- name: Shield
|
||||
description: A safety shield resource that can be used to check content
|
||||
description: >-
|
||||
A safety shield resource that can be used to check content
|
||||
- name: ShieldCallStep
|
||||
description: ''
|
||||
- name: Shields
|
||||
|
@ -5028,8 +5100,9 @@ tags:
|
|||
description: ''
|
||||
- name: SyntheticDataGeneration (Coming Soon)
|
||||
- name: SyntheticDataGenerationResponse
|
||||
description: Response from the synthetic data generation. Batch of (prompt, response,
|
||||
score) tuples that pass the threshold.
|
||||
description: >-
|
||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
- name: SystemMessage
|
||||
description: ''
|
||||
- name: Telemetry
|
||||
|
@ -5067,15 +5140,29 @@ tags:
|
|||
- name: ToolParameter
|
||||
description: ''
|
||||
- name: ToolPromptFormat
|
||||
description: "This Enum refers to the prompt format for calling custom / zero
|
||||
shot tools\n\n`json` --\n Refers to the json format for calling tools.\n\
|
||||
\ The json format takes the form like\n {\n \"type\": \"function\"\
|
||||
,\n \"function\" : {\n \"name\": \"function_name\",\n \
|
||||
\ \"description\": \"function_description\",\n \"parameters\"\
|
||||
: {...}\n }\n }\n\n`function_tag` --\n This is an example of how
|
||||
you could define\n your own user defined format for making tool calls.\n\
|
||||
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n
|
||||
\nThe detailed prompts for each of these formats are added to llama cli"
|
||||
description: >-
|
||||
This Enum refers to the prompt format for calling custom / zero shot tools
|
||||
|
||||
|
||||
`json` --
|
||||
Refers to the json format for calling tools.
|
||||
The json format takes the form like
|
||||
{
|
||||
"type": "function",
|
||||
"function" : {
|
||||
"name": "function_name",
|
||||
"description": "function_description",
|
||||
"parameters": {...}
|
||||
}
|
||||
}
|
||||
|
||||
`function_tag` --
|
||||
This is an example of how you could define
|
||||
your own user defined format for making tool calls.
|
||||
The function_tag format looks like this,
|
||||
<function=function_name>(parameters)</function>
|
||||
|
||||
The detailed prompts for each of these formats are added to llama cli
|
||||
- name: ToolResponse
|
||||
description: ''
|
||||
- name: ToolResponseMessage
|
||||
|
@ -5090,7 +5177,8 @@ tags:
|
|||
- name: TrainingConfig
|
||||
description: ''
|
||||
- name: Turn
|
||||
description: A single turn in an interaction with an Agentic System.
|
||||
description: >-
|
||||
A single turn in an interaction with an Agentic System.
|
||||
- name: URL
|
||||
description: ''
|
||||
- name: UnionType
|
||||
|
|
|
@ -7,13 +7,15 @@
|
|||
from typing import List, Optional, Protocol, runtime_checkable
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
CompletionMessage,
|
||||
ChatCompletionResponse,
|
||||
CompletionResponse,
|
||||
InterleavedContent,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
ToolChoice,
|
||||
ToolDefinition,
|
||||
|
@ -21,35 +23,14 @@ from llama_stack.apis.inference import (
|
|||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionRequest(BaseModel):
|
||||
model: str
|
||||
content_batch: List[InterleavedContent]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionResponse(BaseModel):
|
||||
completion_message_batch: List[CompletionMessage]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages_batch: List[List[Message]]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
batch: List[CompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionResponse(BaseModel):
|
||||
completion_message_batch: List[CompletionMessage]
|
||||
batch: List[ChatCompletionResponse]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
|
@ -60,6 +41,7 @@ class BatchInference(Protocol):
|
|||
model: str,
|
||||
content_batch: List[InterleavedContent],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> BatchCompletionResponse: ...
|
||||
|
||||
|
@ -73,5 +55,6 @@ class BatchInference(Protocol):
|
|||
tools: Optional[List[ToolDefinition]] = list,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> BatchChatCompletionResponse: ...
|
||||
|
|
|
@ -186,7 +186,6 @@ ResponseFormat = register_schema(
|
|||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CompletionRequest(BaseModel):
|
||||
model: str
|
||||
content: InterleavedContent
|
||||
|
@ -215,23 +214,6 @@ class CompletionResponseStreamChunk(BaseModel):
|
|||
logprobs: Optional[List[TokenLogProbs]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionRequest(BaseModel):
|
||||
model: str
|
||||
content_batch: List[InterleavedContent]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
response_format: Optional[ResponseFormat] = None
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionResponse(BaseModel):
|
||||
"""Batch completion response."""
|
||||
|
||||
batch: List[CompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: List[Message]
|
||||
|
@ -249,37 +231,15 @@ class ChatCompletionRequest(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ChatCompletionResponseStreamChunk(BaseModel):
|
||||
"""SSE-stream of these events."""
|
||||
|
||||
event: ChatCompletionResponseEvent
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
"""Chat completion response."""
|
||||
|
||||
completion_message: CompletionMessage
|
||||
logprobs: Optional[List[TokenLogProbs]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages_batch: List[List[Message]]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionResponse(BaseModel):
|
||||
batch: List[ChatCompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EmbeddingsResponse(BaseModel):
|
||||
embeddings: List[List[float]]
|
||||
|
@ -303,7 +263,19 @@ class Inference(Protocol):
|
|||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...
|
||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
|
||||
"""Generate a completion for the given content using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use
|
||||
:param content: The content to generate a completion for
|
||||
:param sampling_params: (Optional) Parameters to control the sampling strategy
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:returns: If stream=False, returns a CompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/chat-completion", method="POST")
|
||||
async def chat_completion(
|
||||
|
@ -311,7 +283,6 @@ class Inference(Protocol):
|
|||
model_id: str,
|
||||
messages: List[Message],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = None,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||
|
@ -320,11 +291,33 @@ class Inference(Protocol):
|
|||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[
|
||||
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
|
||||
]: ...
|
||||
]:
|
||||
"""Generate a chat completion for the given messages using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use
|
||||
:param messages: List of messages in the conversation
|
||||
:param sampling_params: Parameters to control the sampling strategy
|
||||
:param tools: (Optional) List of tool definitions available to the model
|
||||
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||
:param tool_prompt_format: (Optional) Specifies how tool definitions are formatted when presenting to the model
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/embeddings", method="POST")
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse: ...
|
||||
) -> EmbeddingsResponse:
|
||||
"""Generate embeddings for content pieces using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use
|
||||
:param contents: List of contents to generate embeddings for. Note that content can be multimodal.
|
||||
:returns: An array of embeddings, one for each content. Each embedding is a list of floats.
|
||||
"""
|
||||
...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue