Update OpenAPI generator to add param and field documentation

This commit is contained in:
Ashwin Bharambe 2025-01-28 12:27:21 -08:00
parent 9f709387e2
commit ebfa8ad4fb
7 changed files with 525 additions and 397 deletions

View file

@ -36,6 +36,16 @@ from .pyopenapi.specification import Info, Server # noqa: E402
from .pyopenapi.utility import Specification # noqa: E402
def str_presenter(dumper, data):
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
"#/components/schemas/"
):
style = None
else:
style = ">" if "\n" in data or len(data) > 40 else None
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
def main(output_dir: str):
output_dir = Path(output_dir)
if not output_dir.exists():
@ -69,7 +79,8 @@ def main(output_dir: str):
y.sequence_dash_offset = 2
y.width = 80
y.allow_unicode = True
y.explicit_start = True
y.representer.add_representer(str, str_presenter)
y.dump(
spec.get_json(),
fp,

View file

@ -8,6 +8,7 @@ import collections
import hashlib
import ipaddress
import typing
from dataclasses import field, make_dataclass
from typing import Any, Dict, Set, Union
from ..strong_typing.core import JsonType
@ -276,6 +277,20 @@ class StatusResponse:
examples: List[Any] = dataclasses.field(default_factory=list)
def create_docstring_for_request(
request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
) -> str:
"""Creates a ReST-style docstring for a dynamically generated request dataclass."""
lines = ["\n"] # Short description
# Add parameter documentation in ReST format
for name, type_ in fields:
desc = doc_params.get(name, "")
lines.append(f":param {name}: {desc}")
return "\n".join(lines)
class ResponseBuilder:
content_builder: ContentBuilder
@ -493,11 +508,24 @@ class Generator:
first = next(iter(op.request_params))
request_name, request_type = first
from dataclasses import make_dataclass
op_name = "".join(word.capitalize() for word in op.name.split("_"))
request_name = f"{op_name}Request"
request_type = make_dataclass(request_name, op.request_params)
fields = [
(
name,
type_,
)
for name, type_ in op.request_params
]
request_type = make_dataclass(
request_name,
fields,
namespace={
"__doc__": create_docstring_for_request(
request_name, fields, doc_params
)
},
)
requestBody = RequestBody(
content={

View file

@ -531,6 +531,7 @@ class JsonSchemaGenerator:
# add property docstring if available
property_doc = property_docstrings.get(property_name)
if property_doc:
# print(output_name, property_doc)
property_def.pop("title", None)
property_def["description"] = property_doc

View file

@ -190,7 +190,7 @@
"post": {
"responses": {
"200": {
"description": "Chat completion response. **OR** SSE-stream of these events.",
"description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
"content": {
"text/event-stream": {
"schema": {
@ -210,6 +210,7 @@
"tags": [
"Inference"
],
"summary": "Generate a chat completion for the given messages using the specified model.",
"parameters": [],
"requestBody": {
"content": {
@ -227,7 +228,7 @@
"post": {
"responses": {
"200": {
"description": "Completion response. **OR** streamed completion response.",
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
"content": {
"text/event-stream": {
"schema": {
@ -247,6 +248,7 @@
"tags": [
"Inference"
],
"summary": "Generate a completion for the given content using the specified model.",
"parameters": [],
"requestBody": {
"content": {
@ -485,7 +487,7 @@
"post": {
"responses": {
"200": {
"description": "OK",
"description": "An array of embeddings, one for each content. Each embedding is a list of floats.",
"content": {
"application/json": {
"schema": {
@ -498,6 +500,7 @@
"tags": [
"Inference"
],
"summary": "Generate embeddings for content pieces using the specified model.",
"parameters": [],
"requestBody": {
"content": {
@ -2372,6 +2375,46 @@
"tool_calls"
]
},
"GrammarResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "grammar",
"default": "grammar"
},
"bnf": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"bnf"
]
},
"GreedySamplingStrategy": {
"type": "object",
"properties": {
@ -2447,6 +2490,46 @@
}
}
},
"JsonSchemaResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "json_schema",
"default": "json_schema"
},
"json_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"json_schema"
]
},
"Message": {
"oneOf": [
{
@ -2472,6 +2555,23 @@
}
}
},
"ResponseFormat": {
"oneOf": [
{
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
},
{
"$ref": "#/components/schemas/GrammarResponseFormat"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
"grammar": "#/components/schemas/GrammarResponseFormat"
}
}
},
"SamplingParams": {
"type": "object",
"properties": {
@ -2865,6 +2965,9 @@
"tool_prompt_format": {
"$ref": "#/components/schemas/ToolPromptFormat"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
},
"logprobs": {
"type": "object",
"properties": {
@ -2885,16 +2988,49 @@
"BatchChatCompletionResponse": {
"type": "object",
"properties": {
"completion_message_batch": {
"batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/CompletionMessage"
"$ref": "#/components/schemas/ChatCompletionResponse"
}
}
},
"additionalProperties": false,
"required": [
"completion_message_batch"
"batch"
]
},
"ChatCompletionResponse": {
"type": "object",
"properties": {
"completion_message": {
"$ref": "#/components/schemas/CompletionMessage"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"completion_message"
]
},
"TokenLogProbs": {
"type": "object",
"properties": {
"logprobs_by_token": {
"type": "object",
"additionalProperties": {
"type": "number"
}
}
},
"additionalProperties": false,
"required": [
"logprobs_by_token"
]
},
"BatchCompletionRequest": {
@ -2912,6 +3048,9 @@
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
},
"logprobs": {
"type": "object",
"properties": {
@ -2932,18 +3071,41 @@
"BatchCompletionResponse": {
"type": "object",
"properties": {
"completion_message_batch": {
"batch": {
"type": "array",
"items": {
"$ref": "#/components/schemas/CompletionMessage"
"$ref": "#/components/schemas/CompletionResponse"
}
}
},
"additionalProperties": false,
"required": [
"completion_message_batch"
"batch"
]
},
"CompletionResponse": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"stop_reason": {
"$ref": "#/components/schemas/StopReason"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "Completion response."
},
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
@ -2956,135 +3118,46 @@
"job_uuid"
]
},
"GrammarResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "grammar",
"default": "grammar"
},
"bnf": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"bnf"
]
},
"JsonSchemaResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "json_schema",
"default": "json_schema"
},
"json_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"json_schema"
]
},
"ResponseFormat": {
"oneOf": [
{
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
},
{
"$ref": "#/components/schemas/GrammarResponseFormat"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
"grammar": "#/components/schemas/GrammarResponseFormat"
}
}
},
"ChatCompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string"
"type": "string",
"description": "The identifier of the model to use"
},
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"description": "List of messages in the conversation"
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
"$ref": "#/components/schemas/SamplingParams",
"description": "Parameters to control the sampling strategy"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDefinition"
}
},
"description": "(Optional) List of tool definitions available to the model"
},
"tool_choice": {
"$ref": "#/components/schemas/ToolChoice"
"$ref": "#/components/schemas/ToolChoice",
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto."
},
"tool_prompt_format": {
"$ref": "#/components/schemas/ToolPromptFormat"
"$ref": "#/components/schemas/ToolPromptFormat",
"description": "(Optional) Specifies how tool definitions are formatted when presenting to the model"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding"
},
"stream": {
"type": "boolean"
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
@ -3094,7 +3167,8 @@
"default": 0
}
},
"additionalProperties": false
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
@ -3103,25 +3177,6 @@
"messages"
]
},
"ChatCompletionResponse": {
"type": "object",
"properties": {
"completion_message": {
"$ref": "#/components/schemas/CompletionMessage"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"completion_message"
],
"title": "Chat completion response."
},
"ChatCompletionResponseEvent": {
"type": "object",
"properties": {
@ -3166,8 +3221,7 @@
"additionalProperties": false,
"required": [
"event"
],
"title": "SSE-stream of these events."
]
},
"ContentDelta": {
"oneOf": [
@ -3227,21 +3281,6 @@
"text"
]
},
"TokenLogProbs": {
"type": "object",
"properties": {
"logprobs_by_token": {
"type": "object",
"additionalProperties": {
"type": "number"
}
}
},
"additionalProperties": false,
"required": [
"logprobs_by_token"
]
},
"ToolCallDelta": {
"type": "object",
"properties": {
@ -3284,19 +3323,24 @@
"type": "object",
"properties": {
"model_id": {
"type": "string"
"type": "string",
"description": "The identifier of the model to use"
},
"content": {
"$ref": "#/components/schemas/InterleavedContent"
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content to generate a completion for"
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding"
},
"stream": {
"type": "boolean"
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
@ -3306,7 +3350,8 @@
"default": 0
}
},
"additionalProperties": false
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
@ -3315,29 +3360,6 @@
"content"
]
},
"CompletionResponse": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"stop_reason": {
"$ref": "#/components/schemas/StopReason"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "Completion response."
},
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
@ -4241,13 +4263,15 @@
"type": "object",
"properties": {
"model_id": {
"type": "string"
"type": "string",
"description": "The identifier of the model to use"
},
"contents": {
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContent"
}
},
"description": "List of contents to generate embeddings for. Note that content can be multimodal."
}
},
"additionalProperties": false,
@ -7863,7 +7887,7 @@
},
{
"name": "ChatCompletionResponse",
"description": "Chat completion response."
"description": ""
},
{
"name": "ChatCompletionResponseEvent",
@ -7875,7 +7899,7 @@
},
{
"name": "ChatCompletionResponseStreamChunk",
"description": "SSE-stream of these events."
"description": ""
},
{
"name": "Checkpoint",

View file

@ -1,11 +1,12 @@
---
openapi: 3.1.0
info:
title: Llama Stack Specification
version: v1
description: "This is the specification of the Llama Stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored
to\n best leverage Llama Models."
description: >-
This is the specification of the Llama Stack that provides
a set of endpoints and their corresponding interfaces that are
tailored to
best leverage Llama Models.
servers:
- url: http://any-hosted-llama-stack.com
paths:
@ -108,7 +109,9 @@ paths:
post:
responses:
'200':
description: Chat completion response. **OR** SSE-stream of these events.
description: >-
If stream=False, returns a ChatCompletionResponse with the full completion.
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
content:
text/event-stream:
schema:
@ -117,6 +120,8 @@ paths:
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
tags:
- Inference
summary: >-
Generate a chat completion for the given messages using the specified model.
parameters: []
requestBody:
content:
@ -128,7 +133,9 @@ paths:
post:
responses:
'200':
description: Completion response. **OR** streamed completion response.
description: >-
If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
content:
text/event-stream:
schema:
@ -137,6 +144,8 @@ paths:
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
tags:
- Inference
summary: >-
Generate a completion for the given content using the specified model.
parameters: []
requestBody:
content:
@ -189,8 +198,9 @@ paths:
post:
responses:
'200':
description: A single turn in an interaction with an Agentic System. **OR**
streamed agent turn completion response.
description: >-
A single turn in an interaction with an Agentic System. **OR** streamed
agent turn completion response.
content:
text/event-stream:
schema:
@ -279,13 +289,17 @@ paths:
post:
responses:
'200':
description: OK
description: >-
An array of embeddings, one for each content. Each embedding is a list
of floats.
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsResponse'
tags:
- Inference
summary: >-
Generate embeddings for content pieces using the specified model.
parameters: []
requestBody:
content:
@ -709,7 +723,8 @@ paths:
description: OK
tags:
- ToolRuntime
summary: Index documents so they can be used by the RAG system
summary: >-
Index documents so they can be used by the RAG system
parameters: []
requestBody:
content:
@ -1109,7 +1124,8 @@ paths:
$ref: '#/components/schemas/RAGQueryResult'
tags:
- ToolRuntime
summary: Query the RAG system for context; typically invoked by the agent
summary: >-
Query the RAG system for context; typically invoked by the agent
parameters: []
requestBody:
content:
@ -1341,7 +1357,8 @@ paths:
tags:
- Inspect
parameters: []
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
schemas:
AppendRowsRequest:
@ -1393,6 +1410,27 @@ components:
- content
- stop_reason
- tool_calls
GrammarResponseFormat:
type: object
properties:
type:
type: string
const: grammar
default: grammar
bnf:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- bnf
GreedySamplingStrategy:
type: object
properties:
@ -1439,6 +1477,27 @@ components:
mapping:
image: '#/components/schemas/ImageContentItem'
text: '#/components/schemas/TextContentItem'
JsonSchemaResponseFormat:
type: object
properties:
type:
type: string
const: json_schema
default: json_schema
json_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- json_schema
Message:
oneOf:
- $ref: '#/components/schemas/UserMessage'
@ -1452,6 +1511,15 @@ components:
system: '#/components/schemas/SystemMessage'
tool: '#/components/schemas/ToolResponseMessage'
assistant: '#/components/schemas/CompletionMessage'
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
- $ref: '#/components/schemas/GrammarResponseFormat'
discriminator:
propertyName: type
mapping:
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
grammar: '#/components/schemas/GrammarResponseFormat'
SamplingParams:
type: object
properties:
@ -1594,16 +1662,28 @@ components:
- json
- function_tag
- python_list
title: This Enum refers to the prompt format for calling custom / zero shot
tools
description: "`json` --\n Refers to the json format for calling tools.\n\
\ The json format takes the form like\n {\n \"type\": \"function\"\
,\n \"function\" : {\n \"name\": \"function_name\",\n \
\ \"description\": \"function_description\",\n \"parameters\"\
: {...}\n }\n }\n\n`function_tag` --\n This is an example of
how you could define\n your own user defined format for making tool calls.\n\
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n
\nThe detailed prompts for each of these formats are added to llama cli"
title: >-
This Enum refers to the prompt format for calling custom / zero shot tools
description: >-
`json` --
Refers to the json format for calling tools.
The json format takes the form like
{
"type": "function",
"function" : {
"name": "function_name",
"description": "function_description",
"parameters": {...}
}
}
`function_tag` --
This is an example of how you could define
your own user defined format for making tool calls.
The function_tag format looks like this,
<function=function_name>(parameters)</function>
The detailed prompts for each of these formats are added to llama cli
ToolResponseMessage:
type: object
properties:
@ -1697,6 +1777,8 @@ components:
$ref: '#/components/schemas/ToolChoice'
tool_prompt_format:
$ref: '#/components/schemas/ToolPromptFormat'
response_format:
$ref: '#/components/schemas/ResponseFormat'
logprobs:
type: object
properties:
@ -1711,13 +1793,35 @@ components:
BatchChatCompletionResponse:
type: object
properties:
completion_message_batch:
batch:
type: array
items:
$ref: '#/components/schemas/CompletionMessage'
$ref: '#/components/schemas/ChatCompletionResponse'
additionalProperties: false
required:
- completion_message_batch
- batch
ChatCompletionResponse:
type: object
properties:
completion_message:
$ref: '#/components/schemas/CompletionMessage'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- completion_message
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
additionalProperties: false
required:
- logprobs_by_token
BatchCompletionRequest:
type: object
properties:
@ -1729,6 +1833,8 @@ components:
$ref: '#/components/schemas/InterleavedContent'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
response_format:
$ref: '#/components/schemas/ResponseFormat'
logprobs:
type: object
properties:
@ -1743,13 +1849,29 @@ components:
BatchCompletionResponse:
type: object
properties:
completion_message_batch:
batch:
type: array
items:
$ref: '#/components/schemas/CompletionMessage'
$ref: '#/components/schemas/CompletionResponse'
additionalProperties: false
required:
- completion_message_batch
- batch
CompletionResponse:
type: object
properties:
content:
type: string
stop_reason:
$ref: '#/components/schemas/StopReason'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- content
- stop_reason
title: Completion response.
CancelTrainingJobRequest:
type: object
properties:
@ -1758,80 +1880,45 @@ components:
additionalProperties: false
required:
- job_uuid
GrammarResponseFormat:
type: object
properties:
type:
type: string
const: grammar
default: grammar
bnf:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- bnf
JsonSchemaResponseFormat:
type: object
properties:
type:
type: string
const: json_schema
default: json_schema
json_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- json_schema
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
- $ref: '#/components/schemas/GrammarResponseFormat'
discriminator:
propertyName: type
mapping:
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
grammar: '#/components/schemas/GrammarResponseFormat'
ChatCompletionRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to use
messages:
type: array
items:
$ref: '#/components/schemas/Message'
description: List of messages in the conversation
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
Parameters to control the sampling strategy
tools:
type: array
items:
$ref: '#/components/schemas/ToolDefinition'
description: >-
(Optional) List of tool definitions available to the model
tool_choice:
$ref: '#/components/schemas/ToolChoice'
description: >-
(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
tool_prompt_format:
$ref: '#/components/schemas/ToolPromptFormat'
description: >-
(Optional) Specifies how tool definitions are formatted when presenting
to the model
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding
stream:
type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs:
type: object
properties:
@ -1839,23 +1926,13 @@ components:
type: integer
default: 0
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- messages
ChatCompletionResponse:
type: object
properties:
completion_message:
$ref: '#/components/schemas/CompletionMessage'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- completion_message
title: Chat completion response.
ChatCompletionResponseEvent:
type: object
properties:
@ -1888,7 +1965,6 @@ components:
additionalProperties: false
required:
- event
title: SSE-stream of these events.
ContentDelta:
oneOf:
- $ref: '#/components/schemas/TextDelta'
@ -1927,16 +2003,6 @@ components:
required:
- type
- text
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
additionalProperties: false
required:
- logprobs_by_token
ToolCallDelta:
type: object
properties:
@ -1967,14 +2033,23 @@ components:
properties:
model_id:
type: string
description: The identifier of the model to use
content:
$ref: '#/components/schemas/InterleavedContent'
description: The content to generate a completion for
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding
stream:
type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs:
type: object
properties:
@ -1982,26 +2057,13 @@ components:
type: integer
default: 0
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- content
CompletionResponse:
type: object
properties:
content:
type: string
stop_reason:
$ref: '#/components/schemas/StopReason'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- content
- stop_reason
title: Completion response.
CompletionResponseStreamChunk:
type: object
properties:
@ -2558,7 +2620,8 @@ components:
- output_message
- output_attachments
- started_at
title: A single turn in an interaction with an Agentic System.
title: >-
A single turn in an interaction with an Agentic System.
ViolationLevel:
type: string
enum:
@ -2570,10 +2633,14 @@ components:
properties:
model_id:
type: string
description: The identifier of the model to use
contents:
type: array
items:
$ref: '#/components/schemas/InterleavedContent'
description: >-
List of contents to generate embeddings for. Note that content can be
multimodal.
additionalProperties: false
required:
- model_id
@ -2845,7 +2912,8 @@ components:
- session_name
- turns
- started_at
title: A single session of an interaction with an Agentic System.
title: >-
A single session of an interaction with an Agentic System.
AgentStepResponse:
type: object
properties:
@ -3194,7 +3262,8 @@ components:
- provider_resource_id
- provider_id
- type
title: A safety shield resource that can be used to check content
title: >-
A safety shield resource that can be used to check content
Span:
type: object
properties:
@ -4684,8 +4753,9 @@ components:
additionalProperties: false
required:
- synthetic_data
title: Response from the synthetic data generation. Batch of (prompt, response,
score) tuples that pass the threshold.
title: >-
Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
VersionInfo:
type: object
properties:
@ -4763,13 +4833,13 @@ tags:
- name: ChatCompletionRequest
description: ''
- name: ChatCompletionResponse
description: Chat completion response.
description: ''
- name: ChatCompletionResponseEvent
description: Chat completion response event.
- name: ChatCompletionResponseEventType
description: ''
- name: ChatCompletionResponseStreamChunk
description: SSE-stream of these events.
description: ''
- name: Checkpoint
description: Checkpoint created during training runs
- name: CompletionInputType
@ -4998,9 +5068,11 @@ tags:
- name: ScoringResult
description: ''
- name: Session
description: A single session of an interaction with an Agentic System.
description: >-
A single session of an interaction with an Agentic System.
- name: Shield
description: A safety shield resource that can be used to check content
description: >-
A safety shield resource that can be used to check content
- name: ShieldCallStep
description: ''
- name: Shields
@ -5028,8 +5100,9 @@ tags:
description: ''
- name: SyntheticDataGeneration (Coming Soon)
- name: SyntheticDataGenerationResponse
description: Response from the synthetic data generation. Batch of (prompt, response,
score) tuples that pass the threshold.
description: >-
Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
- name: SystemMessage
description: ''
- name: Telemetry
@ -5067,15 +5140,29 @@ tags:
- name: ToolParameter
description: ''
- name: ToolPromptFormat
description: "This Enum refers to the prompt format for calling custom / zero
shot tools\n\n`json` --\n Refers to the json format for calling tools.\n\
\ The json format takes the form like\n {\n \"type\": \"function\"\
,\n \"function\" : {\n \"name\": \"function_name\",\n \
\ \"description\": \"function_description\",\n \"parameters\"\
: {...}\n }\n }\n\n`function_tag` --\n This is an example of how
you could define\n your own user defined format for making tool calls.\n\
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n
\nThe detailed prompts for each of these formats are added to llama cli"
description: >-
This Enum refers to the prompt format for calling custom / zero shot tools
`json` --
Refers to the json format for calling tools.
The json format takes the form like
{
"type": "function",
"function" : {
"name": "function_name",
"description": "function_description",
"parameters": {...}
}
}
`function_tag` --
This is an example of how you could define
your own user defined format for making tool calls.
The function_tag format looks like this,
<function=function_name>(parameters)</function>
The detailed prompts for each of these formats are added to llama cli
- name: ToolResponse
description: ''
- name: ToolResponseMessage
@ -5090,7 +5177,8 @@ tags:
- name: TrainingConfig
description: ''
- name: Turn
description: A single turn in an interaction with an Agentic System.
description: >-
A single turn in an interaction with an Agentic System.
- name: URL
description: ''
- name: UnionType

View file

@ -7,13 +7,15 @@
from typing import List, Optional, Protocol, runtime_checkable
from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field
from pydantic import BaseModel
from llama_stack.apis.inference import (
CompletionMessage,
ChatCompletionResponse,
CompletionResponse,
InterleavedContent,
LogProbConfig,
Message,
ResponseFormat,
SamplingParams,
ToolChoice,
ToolDefinition,
@ -21,35 +23,14 @@ from llama_stack.apis.inference import (
)
@json_schema_type
class BatchCompletionRequest(BaseModel):
model: str
content_batch: List[InterleavedContent]
sampling_params: Optional[SamplingParams] = SamplingParams()
logprobs: Optional[LogProbConfig] = None
@json_schema_type
class BatchCompletionResponse(BaseModel):
completion_message_batch: List[CompletionMessage]
@json_schema_type
class BatchChatCompletionRequest(BaseModel):
model: str
messages_batch: List[List[Message]]
sampling_params: Optional[SamplingParams] = SamplingParams()
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
logprobs: Optional[LogProbConfig] = None
batch: List[CompletionResponse]
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
completion_message_batch: List[CompletionMessage]
batch: List[ChatCompletionResponse]
@runtime_checkable
@ -60,6 +41,7 @@ class BatchInference(Protocol):
model: str,
content_batch: List[InterleavedContent],
sampling_params: Optional[SamplingParams] = SamplingParams(),
response_format: Optional[ResponseFormat] = None,
logprobs: Optional[LogProbConfig] = None,
) -> BatchCompletionResponse: ...
@ -73,5 +55,6 @@ class BatchInference(Protocol):
tools: Optional[List[ToolDefinition]] = list,
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = None,
response_format: Optional[ResponseFormat] = None,
logprobs: Optional[LogProbConfig] = None,
) -> BatchChatCompletionResponse: ...

View file

@ -186,7 +186,6 @@ ResponseFormat = register_schema(
)
@json_schema_type
class CompletionRequest(BaseModel):
model: str
content: InterleavedContent
@ -215,23 +214,6 @@ class CompletionResponseStreamChunk(BaseModel):
logprobs: Optional[List[TokenLogProbs]] = None
@json_schema_type
class BatchCompletionRequest(BaseModel):
model: str
content_batch: List[InterleavedContent]
sampling_params: Optional[SamplingParams] = SamplingParams()
response_format: Optional[ResponseFormat] = None
logprobs: Optional[LogProbConfig] = None
@json_schema_type
class BatchCompletionResponse(BaseModel):
"""Batch completion response."""
batch: List[CompletionResponse]
@json_schema_type
class ChatCompletionRequest(BaseModel):
model: str
messages: List[Message]
@ -249,37 +231,15 @@ class ChatCompletionRequest(BaseModel):
@json_schema_type
class ChatCompletionResponseStreamChunk(BaseModel):
"""SSE-stream of these events."""
event: ChatCompletionResponseEvent
@json_schema_type
class ChatCompletionResponse(BaseModel):
"""Chat completion response."""
completion_message: CompletionMessage
logprobs: Optional[List[TokenLogProbs]] = None
@json_schema_type
class BatchChatCompletionRequest(BaseModel):
model: str
messages_batch: List[List[Message]]
sampling_params: Optional[SamplingParams] = SamplingParams()
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
logprobs: Optional[LogProbConfig] = None
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
batch: List[ChatCompletionResponse]
@json_schema_type
class EmbeddingsResponse(BaseModel):
embeddings: List[List[float]]
@ -303,7 +263,19 @@ class Inference(Protocol):
response_format: Optional[ResponseFormat] = None,
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
"""Generate a completion for the given content using the specified model.
:param model_id: The identifier of the model to use
:param content: The content to generate a completion for
:param sampling_params: (Optional) Parameters to control the sampling strategy
:param response_format: (Optional) Grammar specification for guided (structured) decoding
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
"""
...
@webmethod(route="/inference/chat-completion", method="POST")
async def chat_completion(
@ -311,7 +283,6 @@ class Inference(Protocol):
model_id: str,
messages: List[Message],
sampling_params: Optional[SamplingParams] = SamplingParams(),
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = None,
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = None,
@ -320,11 +291,33 @@ class Inference(Protocol):
logprobs: Optional[LogProbConfig] = None,
) -> Union[
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
]: ...
]:
"""Generate a chat completion for the given messages using the specified model.
:param model_id: The identifier of the model to use
:param messages: List of messages in the conversation
:param sampling_params: Parameters to control the sampling strategy
:param tools: (Optional) List of tool definitions available to the model
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
:param tool_prompt_format: (Optional) Specifies how tool definitions are formatted when presenting to the model
:param response_format: (Optional) Grammar specification for guided (structured) decoding
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
"""
...
@webmethod(route="/inference/embeddings", method="POST")
async def embeddings(
self,
model_id: str,
contents: List[InterleavedContent],
) -> EmbeddingsResponse: ...
) -> EmbeddingsResponse:
"""Generate embeddings for content pieces using the specified model.
:param model_id: The identifier of the model to use
:param contents: List of contents to generate embeddings for. Note that content can be multimodal.
:returns: An array of embeddings, one for each content. Each embedding is a list of floats.
"""
...