llama-stack-mirror/source/openapi.yaml
Raghotham Murthy 8631d90f1e added more docs
2024-07-11 03:11:45 -07:00

2279 lines
62 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

components:
responses: {}
schemas:
AgenticSystemCreateRequest:
additionalProperties: false
properties:
available_tools:
items:
additionalProperties: false
properties:
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- type: string
required:
- tool_name
- input_shields
- output_shields
type: object
type: array
executable_tools:
items:
type: string
type: array
uniqueItems: true
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
instructions:
type: string
memory_bank_uuids:
items:
type: string
type: array
model:
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
uuid:
type: string
required:
- uuid
- instructions
- model
- available_tools
- executable_tools
- memory_bank_uuids
- input_shields
- output_shields
type: object
AgenticSystemCreateResponse:
additionalProperties: false
properties:
agent_uuid:
type: string
required:
- agent_uuid
type: object
AgenticSystemExecuteRequest:
additionalProperties: false
properties:
agent_uuid:
type: string
messages:
items:
$ref: '#/components/schemas/Message'
type: array
stream:
default: false
type: boolean
turn_history:
items:
$ref: '#/components/schemas/AgenticSystemTurn'
type: array
required:
- agent_uuid
- messages
- turn_history
- stream
type: object
AgenticSystemExecuteResponse:
additionalProperties: false
properties:
turn:
$ref: '#/components/schemas/AgenticSystemTurn'
required:
- turn
title: non-stream response from the agentic system.
type: object
AgenticSystemExecuteResponseStreamChunk:
additionalProperties: false
properties:
event_type:
enum:
- step_start
- step_end
- step_progress
title: The type of event.
type: string
response_text_delta:
type: string
retrieved_document:
additionalProperties: false
properties:
content:
contentEncoding: base64
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
mime_type:
type: string
uuid:
type: string
required:
- uuid
- content
- metadata
- mime_type
type: object
step_type:
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
step_uuid:
type: string
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
tool_response_delta:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
tool_name:
type: string
required:
- tool_name
- content
type: object
violation:
additionalProperties: false
properties:
details:
type: string
suggested_user_response:
type: string
violation_type:
type: string
required:
- violation_type
- details
type: object
required:
- event_type
- step_uuid
- step_type
title: Streamed agent execution response.
type: object
AgenticSystemTurn:
additionalProperties: false
properties:
response_message:
$ref: '#/components/schemas/Message'
steps:
items:
oneOf:
- additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
step_type:
default: model_inference
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
text:
type: string
uuid:
type: string
required:
- step_type
- uuid
- text
type: object
- additionalProperties: false
properties:
step_type:
default: tool_execution
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
tool_calls:
items:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
type: array
tool_responses:
items:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
tool_name:
type: string
required:
- tool_name
- content
type: object
type: array
uuid:
type: string
required:
- step_type
- uuid
- tool_calls
- tool_responses
type: object
- additionalProperties: false
properties:
step_type:
default: safety_filtering
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
uuid:
type: string
violation:
additionalProperties: false
properties:
details:
type: string
suggested_user_response:
type: string
violation_type:
type: string
required:
- violation_type
- details
type: object
required:
- step_type
- uuid
type: object
- additionalProperties: false
properties:
documents:
items:
additionalProperties: false
properties:
content:
contentEncoding: base64
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
mime_type:
type: string
uuid:
type: string
required:
- uuid
- content
- metadata
- mime_type
type: object
type: array
scores:
items:
type: number
type: array
step_type:
default: memory_retrieval
enum:
- model_inference
- tool_execution
- safety_filtering
- memory_retrieval
title: The type of execution step.
type: string
uuid:
type: string
required:
- step_type
- uuid
- documents
- scores
type: object
type: array
user_messages:
items:
$ref: '#/components/schemas/Message'
type: array
required:
- user_messages
- steps
- response_message
title: A single turn in an interaction with an Agentic System.
type: object
Attachment:
additionalProperties: false
properties:
mime_type:
type: string
url:
$ref: '#/components/schemas/URL'
required:
- url
- mime_type
title: Attachments are used to refer to external resources, such as images,
videos, audio, etc.
type: object
BatchChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
additionalProperties: false
properties:
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- type: string
required:
- tool_name
- input_shields
- output_shields
type: object
type: array
batch_dialogs:
items:
$ref: '#/components/schemas/Dialog'
type: array
logprobs:
default: false
type: boolean
max_tokens:
default: 0
type: integer
model:
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
required:
- model
- batch_dialogs
- sampling_params
- available_tools
- max_tokens
- logprobs
type: object
BatchCompletionRequest:
additionalProperties: false
properties:
content_batch:
items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
type: array
logprobs:
default: false
type: boolean
max_tokens:
default: 0
type: integer
model:
enum:
- llama3_8b
- llama3_70b
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
required:
- model
- content_batch
- sampling_params
- max_tokens
- logprobs
type: object
ChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
additionalProperties: false
properties:
input_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
output_shields:
items:
$ref: '#/components/schemas/ShieldConfig'
type: array
parameters:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
oneOf:
- enum:
- web_search
- math
- image_gen
- code_interpreter
type: string
- type: string
required:
- tool_name
- input_shields
- output_shields
type: object
type: array
dialog:
$ref: '#/components/schemas/Dialog'
logprobs:
default: false
type: boolean
max_tokens:
default: 0
type: integer
model:
enum:
- llama3_8b_chat
- llama3_70b_chat
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
stream:
default: false
type: boolean
required:
- model
- dialog
- sampling_params
- available_tools
- max_tokens
- stream
- logprobs
type: object
ChatCompletionResponse:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
tool_calls:
items:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
type: array
required:
- content
- tool_calls
title: Normal chat completion response.
type: object
ChatCompletionResponseStreamChunk:
additionalProperties: false
properties:
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
tool_call:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
required:
- text_delta
title: Streamed chat completion response. The actual response is a series of
such objects.
type: object
CompletionRequest:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
logprobs:
default: false
type: boolean
max_tokens:
default: 0
type: integer
model:
enum:
- llama3_8b
- llama3_70b
type: string
sampling_params:
additionalProperties: false
properties:
strategy:
default: greedy
type: string
temperature:
default: 0.0
type: number
top_k:
default: 0
type: integer
top_p:
default: 0.95
type: number
required:
- temperature
- strategy
- top_p
- top_k
type: object
stream:
default: false
type: boolean
required:
- content
- model
- sampling_params
- max_tokens
- stream
- logprobs
type: object
CompletionResponse:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
required:
- content
title: Normal completion response.
type: object
CompletionResponseStreamChunk:
additionalProperties: false
properties:
logprobs:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
stop_reason:
enum:
- not_stopped
- finished_ok
- max_tokens
title: Stop reasons are used to indicate why the model stopped generating
text.
type: string
text_delta:
type: string
required:
- text_delta
title: streamed completion response.
type: object
CreateDatasetRequest:
additionalProperties: false
properties:
dataset:
$ref: '#/components/schemas/Dataset'
uuid:
type: string
required:
- uuid
- dataset
title: Request to create a dataset.
type: object
DPOAlignmentConfig:
additionalProperties: false
properties:
epsilon:
type: number
gamma:
type: number
reward_clip:
type: number
reward_scale:
type: number
required:
- reward_scale
- reward_clip
- epsilon
- gamma
type: object
Dataset:
additionalProperties: false
properties:
columns:
additionalProperties:
enum:
- dialog
- text
- media
- number
- json
type: string
type: object
content_url:
$ref: '#/components/schemas/URL'
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
required:
- columns
- content_url
- metadata
title: Dataset to be used for training or evaluating language models.
type: object
Dialog:
additionalProperties: false
properties:
message:
$ref: '#/components/schemas/Message'
message_history:
items:
$ref: '#/components/schemas/Message'
type: array
required:
- message
- message_history
type: object
DoraFinetuningConfig:
additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
KScoredPromptGenerations:
additionalProperties: false
properties:
k_scored_generations:
items:
$ref: '#/components/schemas/ScoredMessage'
type: array
prompt:
$ref: '#/components/schemas/Message'
required:
- prompt
- k_scored_generations
type: object
LoraFinetuningConfig:
additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
MemoryBank:
additionalProperties: false
properties:
name:
type: string
uuid:
type: string
required:
- uuid
- name
type: object
MemoryBankDocument:
additionalProperties: false
properties:
content:
contentEncoding: base64
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
mime_type:
type: string
uuid:
type: string
required:
- uuid
- content
- metadata
- mime_type
type: object
Message:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
role:
enum:
- system
- user
- assistant
- tool
type: string
tool_calls:
items:
additionalProperties: false
properties:
arguments:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
tool_name:
type: string
required:
- tool_name
- arguments
title: A tool call is a request to a tool.
type: object
type: array
tool_responses:
items:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
tool_name:
type: string
required:
- tool_name
- content
type: object
type: array
required:
- role
- content
- tool_calls
- tool_responses
type: object
OptimizerConfig:
additionalProperties: false
properties:
lr:
type: number
lr_min:
type: number
optimizer_type:
enum:
- adam
- adamw
- sgd
type: string
weight_decay:
type: number
required:
- optimizer_type
- lr
- lr_min
- weight_decay
type: object
PostTrainingJobArtifactsResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
job_uuid:
type: string
required:
- job_uuid
- checkpoints
title: Artifacts of a finetuning job.
type: object
PostTrainingJobLogStream:
additionalProperties: false
properties:
job_uuid:
type: string
log_lines:
items:
type: string
type: array
required:
- job_uuid
- log_lines
title: Stream of logs from a finetuning job.
type: object
PostTrainingJobStatusResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
completed_at:
format: date-time
type: string
job_uuid:
type: string
resources_allocated:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
scheduled_at:
format: date-time
type: string
started_at:
format: date-time
type: string
status:
enum:
- running
- completed
- failed
- scheduled
type: string
required:
- job_uuid
- status
- checkpoints
title: Status of a finetuning job.
type: object
PostTrainingRLHFRequest:
additionalProperties: false
properties:
algorithm:
enum:
- dpo
type: string
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
dataset:
$ref: '#/components/schemas/Dataset'
finetuned_model:
$ref: '#/components/schemas/URL'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- finetuned_model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
PostTrainingSFTRequest:
additionalProperties: false
properties:
algorithm:
enum:
- full
- lora
- qlora
- dora
type: string
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- $ref: '#/components/schemas/QLoraFinetuningConfig'
- $ref: '#/components/schemas/DoraFinetuningConfig'
dataset:
$ref: '#/components/schemas/Dataset'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model:
enum:
- llama3_8b
- llama3_70b
type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
QLoraFinetuningConfig:
additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
RewardScoringRequest:
additionalProperties: false
properties:
model:
enum:
- llama3_405b_reward
type: string
prompt_generations:
items:
additionalProperties: false
properties:
dialog:
$ref: '#/components/schemas/Dialog'
k_generations:
items:
$ref: '#/components/schemas/Message'
type: array
required:
- dialog
- k_generations
type: object
type: array
required:
- prompt_generations
- model
title: Request to score a reward function. A list of prompts and a list of responses
per prompt.
type: object
RewardScoringResponse:
additionalProperties: false
properties:
scored_generations:
items:
$ref: '#/components/schemas/KScoredPromptGenerations'
type: array
required:
- scored_generations
title: Response from the reward scoring. Batch of (prompt, response, score)
tuples that pass the threshold.
type: object
ScoredMessage:
additionalProperties: false
properties:
message:
$ref: '#/components/schemas/Message'
score:
type: number
required:
- message
- score
type: object
ShieldConfig:
additionalProperties: false
properties:
params:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
shield_type:
enum:
- llama_guard
- prompt_guard
- code_guard
title: The type of safety shield.
type: string
required:
- shield_type
- params
type: object
SyntheticDataGenerationRequest:
additionalProperties: false
properties:
filtering_function:
default: none
enum:
- none
- random
- top_k
- top_p
- top_k_top_p
- sigmoid
title: The type of filtering function.
type: string
prompts:
items:
$ref: '#/components/schemas/Message'
type: array
reward_scoring:
type: object
required:
- prompts
- filtering_function
title: Request to generate synthetic data. A small batch of prompts and a filtering
function
type: object
SyntheticDataGenerationResponse:
additionalProperties: false
properties:
statistics:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
synthetic_data:
items:
$ref: '#/components/schemas/KScoredPromptGenerations'
type: array
required:
- synthetic_data
title: Response from the synthetic data generation. Batch of (prompt, response,
score) tuples that pass the threshold.
type: object
TrainingConfig:
additionalProperties: false
properties:
batch_size:
type: integer
enable_activation_checkpointing:
type: boolean
fsdp_cpu_offload:
type: boolean
memory_efficient_fsdp_wrap:
type: boolean
n_epochs:
type: integer
n_iters:
type: integer
shuffle:
type: boolean
required:
- n_epochs
- batch_size
- shuffle
- n_iters
- enable_activation_checkpointing
- memory_efficient_fsdp_wrap
- fsdp_cpu_offload
type: object
URL:
format: uri
pattern: ^(https?://|file://|data:)
type: string
info:
description: "Meta has built out a fairly sophisticated platform internally to post\
\ train, evaluate, and \n serve Llama models to support Metas\
\ products. Given the newer capabilities of the llama models, \n \
\ the model development and model serving capabilities of the platform need\
\ to be enhanced in \n specific ways in order to best leverage\
\ the models. For example, the inference platform needs \n to support\
\ code execution to take advantage of the built-in knowledge of tools of the model.\
\ \n The largest models are of high enough quality to be used to\
\ generate synthetic data or be used \n as reward models. There\
\ are specific fine tuning and quantization techniques that we have found \n \
\ result in the best performing Llama models. We would like to share\
\ ways in which an LLM Ops \n toolchain can be designed by leveraging\
\ our learnings in getting Llama models to power Metas products.\n \
\ <br>\n In addition, the Llama 3 models Meta will release\
\ in July should not just be seen as a model, but \n really as\
\ a system starting the transition towards an entity capable of performing \"\
agentic\" tasks \n which require the ability to act as the central\
\ planner and break a task down and perform multi-step \n reasoning\
\ and call tools for specific operations. In addition, there needs to be general\
\ model-level \n safety checks as well as task-specific safety\
\ checks that are performed at a system level. \n <br>\n \
\ We are defining the Llama Stack as a set of APIs and standards by synthesizing\
\ our learnings while \n working with Llama models. The APIs are\
\ divided into the llama-toolchain-api and the llama-agentic-system-api. \n \
\ These APIs provide a coherent way for model developers to fine\
\ tune and serve Llama models, and agentic app \n developers to\
\ leverage all the capabilities of the Llama models seamlessly. We would like\
\ to work with the \n ecosystem to enhance and simplify the API.\
\ In addition, we will be releasing a plug-in architecture to allow \n \
\ creating distributions of the llama stack with different implementations.\n\
\ <br>\n This is the specification of the llama\
\ stack that provides \n a set of endpoints and their corresponding\
\ interfaces that are tailored to \n best leverage Llama Models.\
\ The specification is still in draft and subject to change."
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
openapi: 3.1.0
paths:
/agentic_system/create:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemCreateRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemCreateResponse'
description: OK
tags:
- AgenticSystem
/agentic_system/delete:
delete:
parameters:
- in: query
name: agent_id
required: true
schema:
type: string
responses:
'200':
description: OK
tags:
- AgenticSystem
/agentic_system/execute:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AgenticSystemExecuteRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/AgenticSystemExecuteResponse'
- $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk'
description: non-stream response from the agentic system. **OR** Streamed
agent execution response.
tags:
- AgenticSystem
/datasets/create:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateDatasetRequest'
required: true
responses:
'200':
description: OK
tags:
- Datasets
/datasets/delete:
delete:
parameters:
- in: query
name: dataset_id
required: true
schema:
type: string
responses:
'200':
description: OK
tags:
- Datasets
/datasets/get:
get:
parameters:
- in: query
name: dataset_id
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
description: OK
tags:
- Datasets
/inference/batch_chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/ChatCompletionResponse'
description: OK
tags:
- Inference
/inference/batch_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/CompletionResponse'
description: OK
tags:
- Inference
/inference/chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/ChatCompletionResponse'
- $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: Normal chat completion response. **OR** Streamed chat completion
response. The actual response is a series of such objects.
tags:
- Inference
/inference/completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/CompletionResponse'
- $ref: '#/components/schemas/CompletionResponseStreamChunk'
description: Normal completion response. **OR** streamed completion response.
tags:
- Inference
/memory_bank/delete:
post:
parameters:
- in: query
name: bank_uuid
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
type: string
type: array
required: true
responses:
'200':
content:
application/jsonl:
schema:
type: string
description: OK
tags:
- MemoryBanks
/memory_bank/get:
post:
parameters:
- in: query
name: bank_uuid
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
type: string
type: array
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/MemoryBankDocument'
description: OK
tags:
- MemoryBanks
/memory_bank/insert:
post:
parameters:
- in: query
name: bank_uuid
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
additionalProperties: false
properties:
content:
contentEncoding: base64
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
mime_type:
type: string
uuid:
type: string
required:
- uuid
- content
- metadata
- mime_type
type: object
type: array
required: true
responses:
'200':
description: OK
tags:
- MemoryBanks
/memory_bank/update:
post:
parameters:
- in: query
name: bank_uuid
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
additionalProperties: false
properties:
content:
contentEncoding: base64
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
mime_type:
type: string
uuid:
type: string
required:
- uuid
- content
- metadata
- mime_type
type: object
type: array
required: true
responses:
'200':
description: OK
tags:
- MemoryBanks
/memory_banks/create:
post:
parameters:
- in: query
name: bank_uuid
required: true
schema:
type: string
- in: query
name: bank_name
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
additionalProperties: false
properties:
content:
contentEncoding: base64
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
mime_type:
type: string
uuid:
type: string
required:
- uuid
- content
- metadata
- mime_type
type: object
type: array
required: true
responses:
'200':
description: OK
tags:
- MemoryBanks
/memory_banks/drop:
delete:
parameters:
- in: query
name: bank_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
type: string
description: OK
tags:
- MemoryBanks
/memory_banks/get:
get:
parameters: []
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/MemoryBank'
description: OK
tags:
- MemoryBanks
/post_training/job/artifacts:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
description: OK
tags:
- PostTraining
/post_training/job/logs:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobLogStream'
description: OK
tags:
- PostTraining
/post_training/job/status:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
description: OK
tags:
- PostTraining
/post_training/preference_optimize/:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingRLHFRequest'
required: true
responses:
'200':
description: OK
tags:
- PostTraining
/post_training/supervised_fine_tune/:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingSFTRequest'
required: true
responses:
'200':
description: OK
tags:
- PostTraining
/reward_scoring/score:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RewardScoringRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/RewardScoringResponse'
description: OK
tags:
- RewardScoring
/synthetic_data_generation/generate:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationResponse'
description: OK
tags:
- SyntheticDataGeneration
security:
- Default: []
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: SyntheticDataGeneration
- name: Inference
- name: MemoryBanks
- name: AgenticSystem
- name: Datasets
- name: RewardScoring
- name: PostTraining
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
name: ShieldConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
/>
name: AgenticSystemCreateRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
/>
name: AgenticSystemCreateResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteRequest"
/>
name: AgenticSystemExecuteRequest
- description: 'A single turn in an interaction with an Agentic System.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurn" />'
name: AgenticSystemTurn
- description: 'Attachments are used to refer to external resources, such as images,
videos, audio, etc.
<SchemaDefinition schemaRef="#/components/schemas/Attachment" />'
name: Attachment
- description: <SchemaDefinition schemaRef="#/components/schemas/Message" />
name: Message
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
name: URL
- description: 'non-stream response from the agentic system.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponse"
/>'
name: AgenticSystemExecuteResponse
- description: 'Streamed agent execution response.
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
/>'
name: AgenticSystemExecuteResponseStreamChunk
- description: 'Request to create a dataset.
<SchemaDefinition schemaRef="#/components/schemas/CreateDatasetRequest" />'
name: CreateDatasetRequest
- description: 'Dataset to be used for training or evaluating language models.
<SchemaDefinition schemaRef="#/components/schemas/Dataset" />'
name: Dataset
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBank" />
name: MemoryBank
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
/>
name: MemoryBankDocument
- description: 'Artifacts of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobArtifactsResponse"
/>'
name: PostTrainingJobArtifactsResponse
- description: 'Status of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobStatusResponse"
/>'
name: PostTrainingJobStatusResponse
- description: 'Stream of logs from a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobLogStream" />'
name: PostTrainingJobLogStream
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
/>
name: BatchChatCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/Dialog" />
name: Dialog
- description: 'Normal chat completion response.
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
name: ChatCompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
/>
name: BatchCompletionRequest
- description: 'Normal completion response.
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
name: CompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
- description: 'Streamed chat completion response. The actual response is a series
of such objects.
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
/>'
name: ChatCompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
/>
name: CompletionRequest
- description: 'streamed completion response.
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
/>'
name: CompletionResponseStreamChunk
- description: 'Request to generate synthetic data. A small batch of prompts and a
filtering function
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationRequest"
/>'
name: SyntheticDataGenerationRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/KScoredPromptGenerations"
/>
name: KScoredPromptGenerations
- description: <SchemaDefinition schemaRef="#/components/schemas/ScoredMessage" />
name: ScoredMessage
- description: 'Response from the synthetic data generation. Batch of (prompt, response,
score) tuples that pass the threshold.
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
/>'
name: SyntheticDataGenerationResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/DPOAlignmentConfig"
/>
name: DPOAlignmentConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
/>
name: OptimizerConfig
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingRLHFRequest" />'
name: PostTrainingRLHFRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
name: TrainingConfig
- description: 'Request to score a reward function. A list of prompts and a list of
responses per prompt.
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringRequest" />'
name: RewardScoringRequest
- description: 'Response from the reward scoring. Batch of (prompt, response, score)
tuples that pass the threshold.
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
name: RewardScoringResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
/>
name: DoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
/>
name: LoraFinetuningConfig
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
name: PostTrainingSFTRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
/>
name: QLoraFinetuningConfig
x-tagGroups:
- name: Operations
tags:
- AgenticSystem
- Datasets
- Inference
- MemoryBanks
- PostTraining
- RewardScoring
- SyntheticDataGeneration
- name: Types
tags:
- AgenticSystemCreateRequest
- AgenticSystemCreateResponse
- AgenticSystemExecuteRequest
- AgenticSystemExecuteResponse
- AgenticSystemExecuteResponseStreamChunk
- AgenticSystemTurn
- Attachment
- BatchChatCompletionRequest
- BatchCompletionRequest
- ChatCompletionRequest
- ChatCompletionResponse
- ChatCompletionResponseStreamChunk
- CompletionRequest
- CompletionResponse
- CompletionResponseStreamChunk
- CreateDatasetRequest
- DPOAlignmentConfig
- Dataset
- Dialog
- DoraFinetuningConfig
- KScoredPromptGenerations
- LoraFinetuningConfig
- MemoryBank
- MemoryBankDocument
- Message
- OptimizerConfig
- PostTrainingJobArtifactsResponse
- PostTrainingJobLogStream
- PostTrainingJobStatusResponse
- PostTrainingRLHFRequest
- PostTrainingSFTRequest
- QLoraFinetuningConfig
- RewardScoringRequest
- RewardScoringResponse
- ScoredMessage
- ShieldConfig
- SyntheticDataGenerationRequest
- SyntheticDataGenerationResponse
- TrainingConfig
- URL