Update OpenAPI generator to add param and field documentation

This commit is contained in:
Ashwin Bharambe 2025-01-28 12:27:21 -08:00
parent 9f709387e2
commit ebfa8ad4fb
7 changed files with 525 additions and 397 deletions

View file

@ -36,6 +36,16 @@ from .pyopenapi.specification import Info, Server # noqa: E402
from .pyopenapi.utility import Specification # noqa: E402 from .pyopenapi.utility import Specification # noqa: E402
def str_presenter(dumper, data):
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
"#/components/schemas/"
):
style = None
else:
style = ">" if "\n" in data or len(data) > 40 else None
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
def main(output_dir: str): def main(output_dir: str):
output_dir = Path(output_dir) output_dir = Path(output_dir)
if not output_dir.exists(): if not output_dir.exists():
@ -69,7 +79,8 @@ def main(output_dir: str):
y.sequence_dash_offset = 2 y.sequence_dash_offset = 2
y.width = 80 y.width = 80
y.allow_unicode = True y.allow_unicode = True
y.explicit_start = True y.representer.add_representer(str, str_presenter)
y.dump( y.dump(
spec.get_json(), spec.get_json(),
fp, fp,

View file

@ -8,6 +8,7 @@ import collections
import hashlib import hashlib
import ipaddress import ipaddress
import typing import typing
from dataclasses import field, make_dataclass
from typing import Any, Dict, Set, Union from typing import Any, Dict, Set, Union
from ..strong_typing.core import JsonType from ..strong_typing.core import JsonType
@ -276,6 +277,20 @@ class StatusResponse:
examples: List[Any] = dataclasses.field(default_factory=list) examples: List[Any] = dataclasses.field(default_factory=list)
def create_docstring_for_request(
request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
) -> str:
"""Creates a ReST-style docstring for a dynamically generated request dataclass."""
lines = ["\n"] # Short description
# Add parameter documentation in ReST format
for name, type_ in fields:
desc = doc_params.get(name, "")
lines.append(f":param {name}: {desc}")
return "\n".join(lines)
class ResponseBuilder: class ResponseBuilder:
content_builder: ContentBuilder content_builder: ContentBuilder
@ -493,11 +508,24 @@ class Generator:
first = next(iter(op.request_params)) first = next(iter(op.request_params))
request_name, request_type = first request_name, request_type = first
from dataclasses import make_dataclass
op_name = "".join(word.capitalize() for word in op.name.split("_")) op_name = "".join(word.capitalize() for word in op.name.split("_"))
request_name = f"{op_name}Request" request_name = f"{op_name}Request"
request_type = make_dataclass(request_name, op.request_params) fields = [
(
name,
type_,
)
for name, type_ in op.request_params
]
request_type = make_dataclass(
request_name,
fields,
namespace={
"__doc__": create_docstring_for_request(
request_name, fields, doc_params
)
},
)
requestBody = RequestBody( requestBody = RequestBody(
content={ content={

View file

@ -531,6 +531,7 @@ class JsonSchemaGenerator:
# add property docstring if available # add property docstring if available
property_doc = property_docstrings.get(property_name) property_doc = property_docstrings.get(property_name)
if property_doc: if property_doc:
# print(output_name, property_doc)
property_def.pop("title", None) property_def.pop("title", None)
property_def["description"] = property_doc property_def["description"] = property_doc

View file

@ -190,7 +190,7 @@
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
"description": "Chat completion response. **OR** SSE-stream of these events.", "description": "If stream=False, returns a ChatCompletionResponse with the full completion. If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk",
"content": { "content": {
"text/event-stream": { "text/event-stream": {
"schema": { "schema": {
@ -210,6 +210,7 @@
"tags": [ "tags": [
"Inference" "Inference"
], ],
"summary": "Generate a chat completion for the given messages using the specified model.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -227,7 +228,7 @@
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
"description": "Completion response. **OR** streamed completion response.", "description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk",
"content": { "content": {
"text/event-stream": { "text/event-stream": {
"schema": { "schema": {
@ -247,6 +248,7 @@
"tags": [ "tags": [
"Inference" "Inference"
], ],
"summary": "Generate a completion for the given content using the specified model.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -485,7 +487,7 @@
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
"description": "OK", "description": "An array of embeddings, one for each content. Each embedding is a list of floats.",
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
@ -498,6 +500,7 @@
"tags": [ "tags": [
"Inference" "Inference"
], ],
"summary": "Generate embeddings for content pieces using the specified model.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -2372,6 +2375,46 @@
"tool_calls" "tool_calls"
] ]
}, },
"GrammarResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "grammar",
"default": "grammar"
},
"bnf": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"bnf"
]
},
"GreedySamplingStrategy": { "GreedySamplingStrategy": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -2447,6 +2490,46 @@
} }
} }
}, },
"JsonSchemaResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "json_schema",
"default": "json_schema"
},
"json_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"json_schema"
]
},
"Message": { "Message": {
"oneOf": [ "oneOf": [
{ {
@ -2472,6 +2555,23 @@
} }
} }
}, },
"ResponseFormat": {
"oneOf": [
{
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
},
{
"$ref": "#/components/schemas/GrammarResponseFormat"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
"grammar": "#/components/schemas/GrammarResponseFormat"
}
}
},
"SamplingParams": { "SamplingParams": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -2865,6 +2965,9 @@
"tool_prompt_format": { "tool_prompt_format": {
"$ref": "#/components/schemas/ToolPromptFormat" "$ref": "#/components/schemas/ToolPromptFormat"
}, },
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
},
"logprobs": { "logprobs": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -2885,16 +2988,49 @@
"BatchChatCompletionResponse": { "BatchChatCompletionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"completion_message_batch": { "batch": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/CompletionMessage" "$ref": "#/components/schemas/ChatCompletionResponse"
} }
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"completion_message_batch" "batch"
]
},
"ChatCompletionResponse": {
"type": "object",
"properties": {
"completion_message": {
"$ref": "#/components/schemas/CompletionMessage"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"completion_message"
]
},
"TokenLogProbs": {
"type": "object",
"properties": {
"logprobs_by_token": {
"type": "object",
"additionalProperties": {
"type": "number"
}
}
},
"additionalProperties": false,
"required": [
"logprobs_by_token"
] ]
}, },
"BatchCompletionRequest": { "BatchCompletionRequest": {
@ -2912,6 +3048,9 @@
"sampling_params": { "sampling_params": {
"$ref": "#/components/schemas/SamplingParams" "$ref": "#/components/schemas/SamplingParams"
}, },
"response_format": {
"$ref": "#/components/schemas/ResponseFormat"
},
"logprobs": { "logprobs": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -2932,18 +3071,41 @@
"BatchCompletionResponse": { "BatchCompletionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"completion_message_batch": { "batch": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/CompletionMessage" "$ref": "#/components/schemas/CompletionResponse"
} }
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"completion_message_batch" "batch"
] ]
}, },
"CompletionResponse": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"stop_reason": {
"$ref": "#/components/schemas/StopReason"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "Completion response."
},
"CancelTrainingJobRequest": { "CancelTrainingJobRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -2956,135 +3118,46 @@
"job_uuid" "job_uuid"
] ]
}, },
"GrammarResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "grammar",
"default": "grammar"
},
"bnf": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"bnf"
]
},
"JsonSchemaResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "json_schema",
"default": "json_schema"
},
"json_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"type",
"json_schema"
]
},
"ResponseFormat": {
"oneOf": [
{
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
},
{
"$ref": "#/components/schemas/GrammarResponseFormat"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
"grammar": "#/components/schemas/GrammarResponseFormat"
}
}
},
"ChatCompletionRequest": { "ChatCompletionRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"model_id": { "model_id": {
"type": "string" "type": "string",
"description": "The identifier of the model to use"
}, },
"messages": { "messages": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/Message" "$ref": "#/components/schemas/Message"
} },
"description": "List of messages in the conversation"
}, },
"sampling_params": { "sampling_params": {
"$ref": "#/components/schemas/SamplingParams" "$ref": "#/components/schemas/SamplingParams",
"description": "Parameters to control the sampling strategy"
}, },
"tools": { "tools": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/ToolDefinition" "$ref": "#/components/schemas/ToolDefinition"
} },
"description": "(Optional) List of tool definitions available to the model"
}, },
"tool_choice": { "tool_choice": {
"$ref": "#/components/schemas/ToolChoice" "$ref": "#/components/schemas/ToolChoice",
"description": "(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto."
}, },
"tool_prompt_format": { "tool_prompt_format": {
"$ref": "#/components/schemas/ToolPromptFormat" "$ref": "#/components/schemas/ToolPromptFormat",
"description": "(Optional) Specifies how tool definitions are formatted when presenting to the model"
}, },
"response_format": { "response_format": {
"$ref": "#/components/schemas/ResponseFormat" "$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding"
}, },
"stream": { "stream": {
"type": "boolean" "type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
}, },
"logprobs": { "logprobs": {
"type": "object", "type": "object",
@ -3094,7 +3167,8 @@
"default": 0 "default": 0
} }
}, },
"additionalProperties": false "additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -3103,25 +3177,6 @@
"messages" "messages"
] ]
}, },
"ChatCompletionResponse": {
"type": "object",
"properties": {
"completion_message": {
"$ref": "#/components/schemas/CompletionMessage"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"completion_message"
],
"title": "Chat completion response."
},
"ChatCompletionResponseEvent": { "ChatCompletionResponseEvent": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -3166,8 +3221,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"event" "event"
], ]
"title": "SSE-stream of these events."
}, },
"ContentDelta": { "ContentDelta": {
"oneOf": [ "oneOf": [
@ -3227,21 +3281,6 @@
"text" "text"
] ]
}, },
"TokenLogProbs": {
"type": "object",
"properties": {
"logprobs_by_token": {
"type": "object",
"additionalProperties": {
"type": "number"
}
}
},
"additionalProperties": false,
"required": [
"logprobs_by_token"
]
},
"ToolCallDelta": { "ToolCallDelta": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -3284,19 +3323,24 @@
"type": "object", "type": "object",
"properties": { "properties": {
"model_id": { "model_id": {
"type": "string" "type": "string",
"description": "The identifier of the model to use"
}, },
"content": { "content": {
"$ref": "#/components/schemas/InterleavedContent" "$ref": "#/components/schemas/InterleavedContent",
"description": "The content to generate a completion for"
}, },
"sampling_params": { "sampling_params": {
"$ref": "#/components/schemas/SamplingParams" "$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy"
}, },
"response_format": { "response_format": {
"$ref": "#/components/schemas/ResponseFormat" "$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding"
}, },
"stream": { "stream": {
"type": "boolean" "type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
}, },
"logprobs": { "logprobs": {
"type": "object", "type": "object",
@ -3306,7 +3350,8 @@
"default": 0 "default": 0
} }
}, },
"additionalProperties": false "additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -3315,29 +3360,6 @@
"content" "content"
] ]
}, },
"CompletionResponse": {
"type": "object",
"properties": {
"content": {
"type": "string"
},
"stop_reason": {
"$ref": "#/components/schemas/StopReason"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
}
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "Completion response."
},
"CompletionResponseStreamChunk": { "CompletionResponseStreamChunk": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -4241,13 +4263,15 @@
"type": "object", "type": "object",
"properties": { "properties": {
"model_id": { "model_id": {
"type": "string" "type": "string",
"description": "The identifier of the model to use"
}, },
"contents": { "contents": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/InterleavedContent" "$ref": "#/components/schemas/InterleavedContent"
} },
"description": "List of contents to generate embeddings for. Note that content can be multimodal."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -7863,7 +7887,7 @@
}, },
{ {
"name": "ChatCompletionResponse", "name": "ChatCompletionResponse",
"description": "Chat completion response." "description": ""
}, },
{ {
"name": "ChatCompletionResponseEvent", "name": "ChatCompletionResponseEvent",
@ -7875,7 +7899,7 @@
}, },
{ {
"name": "ChatCompletionResponseStreamChunk", "name": "ChatCompletionResponseStreamChunk",
"description": "SSE-stream of these events." "description": ""
}, },
{ {
"name": "Checkpoint", "name": "Checkpoint",

View file

@ -1,11 +1,12 @@
---
openapi: 3.1.0 openapi: 3.1.0
info: info:
title: Llama Stack Specification title: Llama Stack Specification
version: v1 version: v1
description: "This is the specification of the Llama Stack that provides\n \ description: >-
\ a set of endpoints and their corresponding interfaces that are tailored This is the specification of the Llama Stack that provides
to\n best leverage Llama Models." a set of endpoints and their corresponding interfaces that are
tailored to
best leverage Llama Models.
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
@ -108,7 +109,9 @@ paths:
post: post:
responses: responses:
'200': '200':
description: Chat completion response. **OR** SSE-stream of these events. description: >-
If stream=False, returns a ChatCompletionResponse with the full completion.
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
content: content:
text/event-stream: text/event-stream:
schema: schema:
@ -117,6 +120,8 @@ paths:
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
tags: tags:
- Inference - Inference
summary: >-
Generate a chat completion for the given messages using the specified model.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -128,7 +133,9 @@ paths:
post: post:
responses: responses:
'200': '200':
description: Completion response. **OR** streamed completion response. description: >-
If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
content: content:
text/event-stream: text/event-stream:
schema: schema:
@ -137,6 +144,8 @@ paths:
- $ref: '#/components/schemas/CompletionResponseStreamChunk' - $ref: '#/components/schemas/CompletionResponseStreamChunk'
tags: tags:
- Inference - Inference
summary: >-
Generate a completion for the given content using the specified model.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -189,8 +198,9 @@ paths:
post: post:
responses: responses:
'200': '200':
description: A single turn in an interaction with an Agentic System. **OR** description: >-
streamed agent turn completion response. A single turn in an interaction with an Agentic System. **OR** streamed
agent turn completion response.
content: content:
text/event-stream: text/event-stream:
schema: schema:
@ -279,13 +289,17 @@ paths:
post: post:
responses: responses:
'200': '200':
description: OK description: >-
An array of embeddings, one for each content. Each embedding is a list
of floats.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/EmbeddingsResponse' $ref: '#/components/schemas/EmbeddingsResponse'
tags: tags:
- Inference - Inference
summary: >-
Generate embeddings for content pieces using the specified model.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -709,7 +723,8 @@ paths:
description: OK description: OK
tags: tags:
- ToolRuntime - ToolRuntime
summary: Index documents so they can be used by the RAG system summary: >-
Index documents so they can be used by the RAG system
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -1109,7 +1124,8 @@ paths:
$ref: '#/components/schemas/RAGQueryResult' $ref: '#/components/schemas/RAGQueryResult'
tags: tags:
- ToolRuntime - ToolRuntime
summary: Query the RAG system for context; typically invoked by the agent summary: >-
Query the RAG system for context; typically invoked by the agent
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -1341,7 +1357,8 @@ paths:
tags: tags:
- Inspect - Inspect
parameters: [] parameters: []
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components: components:
schemas: schemas:
AppendRowsRequest: AppendRowsRequest:
@ -1393,6 +1410,27 @@ components:
- content - content
- stop_reason - stop_reason
- tool_calls - tool_calls
GrammarResponseFormat:
type: object
properties:
type:
type: string
const: grammar
default: grammar
bnf:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- bnf
GreedySamplingStrategy: GreedySamplingStrategy:
type: object type: object
properties: properties:
@ -1439,6 +1477,27 @@ components:
mapping: mapping:
image: '#/components/schemas/ImageContentItem' image: '#/components/schemas/ImageContentItem'
text: '#/components/schemas/TextContentItem' text: '#/components/schemas/TextContentItem'
JsonSchemaResponseFormat:
type: object
properties:
type:
type: string
const: json_schema
default: json_schema
json_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- json_schema
Message: Message:
oneOf: oneOf:
- $ref: '#/components/schemas/UserMessage' - $ref: '#/components/schemas/UserMessage'
@ -1452,6 +1511,15 @@ components:
system: '#/components/schemas/SystemMessage' system: '#/components/schemas/SystemMessage'
tool: '#/components/schemas/ToolResponseMessage' tool: '#/components/schemas/ToolResponseMessage'
assistant: '#/components/schemas/CompletionMessage' assistant: '#/components/schemas/CompletionMessage'
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
- $ref: '#/components/schemas/GrammarResponseFormat'
discriminator:
propertyName: type
mapping:
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
grammar: '#/components/schemas/GrammarResponseFormat'
SamplingParams: SamplingParams:
type: object type: object
properties: properties:
@ -1594,16 +1662,28 @@ components:
- json - json
- function_tag - function_tag
- python_list - python_list
title: This Enum refers to the prompt format for calling custom / zero shot title: >-
tools This Enum refers to the prompt format for calling custom / zero shot tools
description: "`json` --\n Refers to the json format for calling tools.\n\ description: >-
\ The json format takes the form like\n {\n \"type\": \"function\"\ `json` --
,\n \"function\" : {\n \"name\": \"function_name\",\n \ Refers to the json format for calling tools.
\ \"description\": \"function_description\",\n \"parameters\"\ The json format takes the form like
: {...}\n }\n }\n\n`function_tag` --\n This is an example of {
how you could define\n your own user defined format for making tool calls.\n\ "type": "function",
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n "function" : {
\nThe detailed prompts for each of these formats are added to llama cli" "name": "function_name",
"description": "function_description",
"parameters": {...}
}
}
`function_tag` --
This is an example of how you could define
your own user defined format for making tool calls.
The function_tag format looks like this,
<function=function_name>(parameters)</function>
The detailed prompts for each of these formats are added to llama cli
ToolResponseMessage: ToolResponseMessage:
type: object type: object
properties: properties:
@ -1697,6 +1777,8 @@ components:
$ref: '#/components/schemas/ToolChoice' $ref: '#/components/schemas/ToolChoice'
tool_prompt_format: tool_prompt_format:
$ref: '#/components/schemas/ToolPromptFormat' $ref: '#/components/schemas/ToolPromptFormat'
response_format:
$ref: '#/components/schemas/ResponseFormat'
logprobs: logprobs:
type: object type: object
properties: properties:
@ -1711,13 +1793,35 @@ components:
BatchChatCompletionResponse: BatchChatCompletionResponse:
type: object type: object
properties: properties:
completion_message_batch: batch:
type: array type: array
items: items:
$ref: '#/components/schemas/CompletionMessage' $ref: '#/components/schemas/ChatCompletionResponse'
additionalProperties: false additionalProperties: false
required: required:
- completion_message_batch - batch
ChatCompletionResponse:
type: object
properties:
completion_message:
$ref: '#/components/schemas/CompletionMessage'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- completion_message
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
additionalProperties: false
required:
- logprobs_by_token
BatchCompletionRequest: BatchCompletionRequest:
type: object type: object
properties: properties:
@ -1729,6 +1833,8 @@ components:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
sampling_params: sampling_params:
$ref: '#/components/schemas/SamplingParams' $ref: '#/components/schemas/SamplingParams'
response_format:
$ref: '#/components/schemas/ResponseFormat'
logprobs: logprobs:
type: object type: object
properties: properties:
@ -1743,13 +1849,29 @@ components:
BatchCompletionResponse: BatchCompletionResponse:
type: object type: object
properties: properties:
completion_message_batch: batch:
type: array type: array
items: items:
$ref: '#/components/schemas/CompletionMessage' $ref: '#/components/schemas/CompletionResponse'
additionalProperties: false additionalProperties: false
required: required:
- completion_message_batch - batch
CompletionResponse:
type: object
properties:
content:
type: string
stop_reason:
$ref: '#/components/schemas/StopReason'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- content
- stop_reason
title: Completion response.
CancelTrainingJobRequest: CancelTrainingJobRequest:
type: object type: object
properties: properties:
@ -1758,80 +1880,45 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- job_uuid - job_uuid
GrammarResponseFormat:
type: object
properties:
type:
type: string
const: grammar
default: grammar
bnf:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- bnf
JsonSchemaResponseFormat:
type: object
properties:
type:
type: string
const: json_schema
default: json_schema
json_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- type
- json_schema
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
- $ref: '#/components/schemas/GrammarResponseFormat'
discriminator:
propertyName: type
mapping:
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
grammar: '#/components/schemas/GrammarResponseFormat'
ChatCompletionRequest: ChatCompletionRequest:
type: object type: object
properties: properties:
model_id: model_id:
type: string type: string
description: The identifier of the model to use
messages: messages:
type: array type: array
items: items:
$ref: '#/components/schemas/Message' $ref: '#/components/schemas/Message'
description: List of messages in the conversation
sampling_params: sampling_params:
$ref: '#/components/schemas/SamplingParams' $ref: '#/components/schemas/SamplingParams'
description: >-
Parameters to control the sampling strategy
tools: tools:
type: array type: array
items: items:
$ref: '#/components/schemas/ToolDefinition' $ref: '#/components/schemas/ToolDefinition'
description: >-
(Optional) List of tool definitions available to the model
tool_choice: tool_choice:
$ref: '#/components/schemas/ToolChoice' $ref: '#/components/schemas/ToolChoice'
description: >-
(Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
tool_prompt_format: tool_prompt_format:
$ref: '#/components/schemas/ToolPromptFormat' $ref: '#/components/schemas/ToolPromptFormat'
description: >-
(Optional) Specifies how tool definitions are formatted when presenting
to the model
response_format: response_format:
$ref: '#/components/schemas/ResponseFormat' $ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding
stream: stream:
type: boolean type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs: logprobs:
type: object type: object
properties: properties:
@ -1839,23 +1926,13 @@ components:
type: integer type: integer
default: 0 default: 0
additionalProperties: false additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false additionalProperties: false
required: required:
- model_id - model_id
- messages - messages
ChatCompletionResponse:
type: object
properties:
completion_message:
$ref: '#/components/schemas/CompletionMessage'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- completion_message
title: Chat completion response.
ChatCompletionResponseEvent: ChatCompletionResponseEvent:
type: object type: object
properties: properties:
@ -1888,7 +1965,6 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- event - event
title: SSE-stream of these events.
ContentDelta: ContentDelta:
oneOf: oneOf:
- $ref: '#/components/schemas/TextDelta' - $ref: '#/components/schemas/TextDelta'
@ -1927,16 +2003,6 @@ components:
required: required:
- type - type
- text - text
TokenLogProbs:
type: object
properties:
logprobs_by_token:
type: object
additionalProperties:
type: number
additionalProperties: false
required:
- logprobs_by_token
ToolCallDelta: ToolCallDelta:
type: object type: object
properties: properties:
@ -1967,14 +2033,23 @@ components:
properties: properties:
model_id: model_id:
type: string type: string
description: The identifier of the model to use
content: content:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: The content to generate a completion for
sampling_params: sampling_params:
$ref: '#/components/schemas/SamplingParams' $ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy
response_format: response_format:
$ref: '#/components/schemas/ResponseFormat' $ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding
stream: stream:
type: boolean type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs: logprobs:
type: object type: object
properties: properties:
@ -1982,26 +2057,13 @@ components:
type: integer type: integer
default: 0 default: 0
additionalProperties: false additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false additionalProperties: false
required: required:
- model_id - model_id
- content - content
CompletionResponse:
type: object
properties:
content:
type: string
stop_reason:
$ref: '#/components/schemas/StopReason'
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
additionalProperties: false
required:
- content
- stop_reason
title: Completion response.
CompletionResponseStreamChunk: CompletionResponseStreamChunk:
type: object type: object
properties: properties:
@ -2558,7 +2620,8 @@ components:
- output_message - output_message
- output_attachments - output_attachments
- started_at - started_at
title: A single turn in an interaction with an Agentic System. title: >-
A single turn in an interaction with an Agentic System.
ViolationLevel: ViolationLevel:
type: string type: string
enum: enum:
@ -2570,10 +2633,14 @@ components:
properties: properties:
model_id: model_id:
type: string type: string
description: The identifier of the model to use
contents: contents:
type: array type: array
items: items:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: >-
List of contents to generate embeddings for. Note that content can be
multimodal.
additionalProperties: false additionalProperties: false
required: required:
- model_id - model_id
@ -2845,7 +2912,8 @@ components:
- session_name - session_name
- turns - turns
- started_at - started_at
title: A single session of an interaction with an Agentic System. title: >-
A single session of an interaction with an Agentic System.
AgentStepResponse: AgentStepResponse:
type: object type: object
properties: properties:
@ -3194,7 +3262,8 @@ components:
- provider_resource_id - provider_resource_id
- provider_id - provider_id
- type - type
title: A safety shield resource that can be used to check content title: >-
A safety shield resource that can be used to check content
Span: Span:
type: object type: object
properties: properties:
@ -4684,8 +4753,9 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- synthetic_data - synthetic_data
title: Response from the synthetic data generation. Batch of (prompt, response, title: >-
score) tuples that pass the threshold. Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
VersionInfo: VersionInfo:
type: object type: object
properties: properties:
@ -4763,13 +4833,13 @@ tags:
- name: ChatCompletionRequest - name: ChatCompletionRequest
description: '' description: ''
- name: ChatCompletionResponse - name: ChatCompletionResponse
description: Chat completion response. description: ''
- name: ChatCompletionResponseEvent - name: ChatCompletionResponseEvent
description: Chat completion response event. description: Chat completion response event.
- name: ChatCompletionResponseEventType - name: ChatCompletionResponseEventType
description: '' description: ''
- name: ChatCompletionResponseStreamChunk - name: ChatCompletionResponseStreamChunk
description: SSE-stream of these events. description: ''
- name: Checkpoint - name: Checkpoint
description: Checkpoint created during training runs description: Checkpoint created during training runs
- name: CompletionInputType - name: CompletionInputType
@ -4998,9 +5068,11 @@ tags:
- name: ScoringResult - name: ScoringResult
description: '' description: ''
- name: Session - name: Session
description: A single session of an interaction with an Agentic System. description: >-
A single session of an interaction with an Agentic System.
- name: Shield - name: Shield
description: A safety shield resource that can be used to check content description: >-
A safety shield resource that can be used to check content
- name: ShieldCallStep - name: ShieldCallStep
description: '' description: ''
- name: Shields - name: Shields
@ -5028,8 +5100,9 @@ tags:
description: '' description: ''
- name: SyntheticDataGeneration (Coming Soon) - name: SyntheticDataGeneration (Coming Soon)
- name: SyntheticDataGenerationResponse - name: SyntheticDataGenerationResponse
description: Response from the synthetic data generation. Batch of (prompt, response, description: >-
score) tuples that pass the threshold. Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
- name: SystemMessage - name: SystemMessage
description: '' description: ''
- name: Telemetry - name: Telemetry
@ -5067,15 +5140,29 @@ tags:
- name: ToolParameter - name: ToolParameter
description: '' description: ''
- name: ToolPromptFormat - name: ToolPromptFormat
description: "This Enum refers to the prompt format for calling custom / zero description: >-
shot tools\n\n`json` --\n Refers to the json format for calling tools.\n\ This Enum refers to the prompt format for calling custom / zero shot tools
\ The json format takes the form like\n {\n \"type\": \"function\"\
,\n \"function\" : {\n \"name\": \"function_name\",\n \
\ \"description\": \"function_description\",\n \"parameters\"\ `json` --
: {...}\n }\n }\n\n`function_tag` --\n This is an example of how Refers to the json format for calling tools.
you could define\n your own user defined format for making tool calls.\n\ The json format takes the form like
\ The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n {
\nThe detailed prompts for each of these formats are added to llama cli" "type": "function",
"function" : {
"name": "function_name",
"description": "function_description",
"parameters": {...}
}
}
`function_tag` --
This is an example of how you could define
your own user defined format for making tool calls.
The function_tag format looks like this,
<function=function_name>(parameters)</function>
The detailed prompts for each of these formats are added to llama cli
- name: ToolResponse - name: ToolResponse
description: '' description: ''
- name: ToolResponseMessage - name: ToolResponseMessage
@ -5090,7 +5177,8 @@ tags:
- name: TrainingConfig - name: TrainingConfig
description: '' description: ''
- name: Turn - name: Turn
description: A single turn in an interaction with an Agentic System. description: >-
A single turn in an interaction with an Agentic System.
- name: URL - name: URL
description: '' description: ''
- name: UnionType - name: UnionType

View file

@ -7,13 +7,15 @@
from typing import List, Optional, Protocol, runtime_checkable from typing import List, Optional, Protocol, runtime_checkable
from llama_models.schema_utils import json_schema_type, webmethod from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field from pydantic import BaseModel
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
CompletionMessage, ChatCompletionResponse,
CompletionResponse,
InterleavedContent, InterleavedContent,
LogProbConfig, LogProbConfig,
Message, Message,
ResponseFormat,
SamplingParams, SamplingParams,
ToolChoice, ToolChoice,
ToolDefinition, ToolDefinition,
@ -21,35 +23,14 @@ from llama_stack.apis.inference import (
) )
@json_schema_type
class BatchCompletionRequest(BaseModel):
model: str
content_batch: List[InterleavedContent]
sampling_params: Optional[SamplingParams] = SamplingParams()
logprobs: Optional[LogProbConfig] = None
@json_schema_type @json_schema_type
class BatchCompletionResponse(BaseModel): class BatchCompletionResponse(BaseModel):
completion_message_batch: List[CompletionMessage] batch: List[CompletionResponse]
@json_schema_type
class BatchChatCompletionRequest(BaseModel):
model: str
messages_batch: List[List[Message]]
sampling_params: Optional[SamplingParams] = SamplingParams()
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
logprobs: Optional[LogProbConfig] = None
@json_schema_type @json_schema_type
class BatchChatCompletionResponse(BaseModel): class BatchChatCompletionResponse(BaseModel):
completion_message_batch: List[CompletionMessage] batch: List[ChatCompletionResponse]
@runtime_checkable @runtime_checkable
@ -60,6 +41,7 @@ class BatchInference(Protocol):
model: str, model: str,
content_batch: List[InterleavedContent], content_batch: List[InterleavedContent],
sampling_params: Optional[SamplingParams] = SamplingParams(), sampling_params: Optional[SamplingParams] = SamplingParams(),
response_format: Optional[ResponseFormat] = None,
logprobs: Optional[LogProbConfig] = None, logprobs: Optional[LogProbConfig] = None,
) -> BatchCompletionResponse: ... ) -> BatchCompletionResponse: ...
@ -73,5 +55,6 @@ class BatchInference(Protocol):
tools: Optional[List[ToolDefinition]] = list, tools: Optional[List[ToolDefinition]] = list,
tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = None, tool_prompt_format: Optional[ToolPromptFormat] = None,
response_format: Optional[ResponseFormat] = None,
logprobs: Optional[LogProbConfig] = None, logprobs: Optional[LogProbConfig] = None,
) -> BatchChatCompletionResponse: ... ) -> BatchChatCompletionResponse: ...

View file

@ -186,7 +186,6 @@ ResponseFormat = register_schema(
) )
@json_schema_type
class CompletionRequest(BaseModel): class CompletionRequest(BaseModel):
model: str model: str
content: InterleavedContent content: InterleavedContent
@ -215,23 +214,6 @@ class CompletionResponseStreamChunk(BaseModel):
logprobs: Optional[List[TokenLogProbs]] = None logprobs: Optional[List[TokenLogProbs]] = None
@json_schema_type
class BatchCompletionRequest(BaseModel):
model: str
content_batch: List[InterleavedContent]
sampling_params: Optional[SamplingParams] = SamplingParams()
response_format: Optional[ResponseFormat] = None
logprobs: Optional[LogProbConfig] = None
@json_schema_type
class BatchCompletionResponse(BaseModel):
"""Batch completion response."""
batch: List[CompletionResponse]
@json_schema_type
class ChatCompletionRequest(BaseModel): class ChatCompletionRequest(BaseModel):
model: str model: str
messages: List[Message] messages: List[Message]
@ -249,37 +231,15 @@ class ChatCompletionRequest(BaseModel):
@json_schema_type @json_schema_type
class ChatCompletionResponseStreamChunk(BaseModel): class ChatCompletionResponseStreamChunk(BaseModel):
"""SSE-stream of these events."""
event: ChatCompletionResponseEvent event: ChatCompletionResponseEvent
@json_schema_type @json_schema_type
class ChatCompletionResponse(BaseModel): class ChatCompletionResponse(BaseModel):
"""Chat completion response."""
completion_message: CompletionMessage completion_message: CompletionMessage
logprobs: Optional[List[TokenLogProbs]] = None logprobs: Optional[List[TokenLogProbs]] = None
@json_schema_type
class BatchChatCompletionRequest(BaseModel):
model: str
messages_batch: List[List[Message]]
sampling_params: Optional[SamplingParams] = SamplingParams()
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
logprobs: Optional[LogProbConfig] = None
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
batch: List[ChatCompletionResponse]
@json_schema_type @json_schema_type
class EmbeddingsResponse(BaseModel): class EmbeddingsResponse(BaseModel):
embeddings: List[List[float]] embeddings: List[List[float]]
@ -303,7 +263,19 @@ class Inference(Protocol):
response_format: Optional[ResponseFormat] = None, response_format: Optional[ResponseFormat] = None,
stream: Optional[bool] = False, stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None, logprobs: Optional[LogProbConfig] = None,
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ... ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
"""Generate a completion for the given content using the specified model.
:param model_id: The identifier of the model to use
:param content: The content to generate a completion for
:param sampling_params: (Optional) Parameters to control the sampling strategy
:param response_format: (Optional) Grammar specification for guided (structured) decoding
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
"""
...
@webmethod(route="/inference/chat-completion", method="POST") @webmethod(route="/inference/chat-completion", method="POST")
async def chat_completion( async def chat_completion(
@ -311,7 +283,6 @@ class Inference(Protocol):
model_id: str, model_id: str,
messages: List[Message], messages: List[Message],
sampling_params: Optional[SamplingParams] = SamplingParams(), sampling_params: Optional[SamplingParams] = SamplingParams(),
# zero-shot tool definitions as input to the model
tools: Optional[List[ToolDefinition]] = None, tools: Optional[List[ToolDefinition]] = None,
tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_choice: Optional[ToolChoice] = ToolChoice.auto,
tool_prompt_format: Optional[ToolPromptFormat] = None, tool_prompt_format: Optional[ToolPromptFormat] = None,
@ -320,11 +291,33 @@ class Inference(Protocol):
logprobs: Optional[LogProbConfig] = None, logprobs: Optional[LogProbConfig] = None,
) -> Union[ ) -> Union[
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk] ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
]: ... ]:
"""Generate a chat completion for the given messages using the specified model.
:param model_id: The identifier of the model to use
:param messages: List of messages in the conversation
:param sampling_params: Parameters to control the sampling strategy
:param tools: (Optional) List of tool definitions available to the model
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
:param tool_prompt_format: (Optional) Specifies how tool definitions are formatted when presenting to the model
:param response_format: (Optional) Grammar specification for guided (structured) decoding
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
"""
...
@webmethod(route="/inference/embeddings", method="POST") @webmethod(route="/inference/embeddings", method="POST")
async def embeddings( async def embeddings(
self, self,
model_id: str, model_id: str,
contents: List[InterleavedContent], contents: List[InterleavedContent],
) -> EmbeddingsResponse: ... ) -> EmbeddingsResponse:
"""Generate embeddings for content pieces using the specified model.
:param model_id: The identifier of the model to use
:param contents: List of contents to generate embeddings for. Note that content can be multimodal.
:returns: An array of embeddings, one for each content. Each embedding is a list of floats.
"""
...