mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-22 12:37:53 +00:00
72 lines
1.6 KiB
YAML
72 lines
1.6 KiB
YAML
# Simple bullet form for ease of read and iteration
|
|
# Use LLMs to translate this to a OpenAPI spec.
|
|
|
|
== Schema ==
|
|
|
|
Message:
|
|
role: str
|
|
text: str
|
|
attachements: List[MediaAttachment]
|
|
eot: bool
|
|
tool_call: bool # if it's a tool call - builtin or custom or ipython
|
|
# for streaming
|
|
is_complete: bool
|
|
is_header_complete: bool
|
|
metadata: json
|
|
|
|
MediaAttachment:
|
|
attachement_type: MediaAttachmentType
|
|
data_type: MediaAttachmentDataType
|
|
data: str
|
|
|
|
MediaAttachmentType: # enum [image, video, audio, text(or file)]
|
|
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
|
|
|
|
Completion:
|
|
id: str
|
|
message: Message
|
|
tokens: List[int]
|
|
logprobs: List[floats]
|
|
finish_reason: str # Enum (stop, safety, max-length, etc)
|
|
|
|
Options:
|
|
logprobs: bool
|
|
max_tokens: int
|
|
temperature: float
|
|
top_p: float
|
|
#TODO: Get more options from metagen
|
|
|
|
TokenUsage:
|
|
input_tokens: int
|
|
output_tokens: int
|
|
total_tokens: int
|
|
|
|
== Callsite ==
|
|
|
|
callsite:
|
|
chat_completion/
|
|
request_type:
|
|
post
|
|
description:
|
|
submit a chat completion request
|
|
request:
|
|
messages: List[Message]
|
|
model: str
|
|
options: Options
|
|
n_complections: int
|
|
# TODO: how to handle tooling control if any ?
|
|
# Add `tools` and `tool_choice` --
|
|
# for eg. "auto": use model's guess
|
|
# how to force to use particular tool
|
|
# how to disbale inbuilt tools
|
|
# tools: List[Tool]
|
|
# tool_choice: Any
|
|
response:
|
|
id: str
|
|
candidates: List[Completion] # a list to account for when n_completions > 1
|
|
model_called: str # info on that model that produced this result
|
|
usage: TokenUsage
|
|
|
|
# TODO
|
|
# callsite:
|
|
# chat_completion_stream/
|