This commit is contained in:
Ashwin Bharambe 2024-07-11 10:04:56 -07:00
parent 6d6c07b882
commit f6b2b2fb39
13 changed files with 20 additions and 2286 deletions

View file

@ -2,9 +2,26 @@ This repo contains the API specifications for various parts of the Llama Stack.
The Stack consists of toolchain-apis and agentic-apis.
The tool chain apis that are covered --
- chat_completion
- batch inference
- fine tuning
- inference / batch inference
- post training
- reward model scoring
- synthetic data generation
### Generate OpenAPI specs
Set up virtual environment
```
python3.9 -m venv ~/.venv/toolchain/
source ~/.venv/toolchain/bin/activate
with-proxy pip3 install -r requirements.txt
```
Run the generate.sh script
```
cd source && sh generate.sh
```

View file

@ -1,167 +0,0 @@
openapi: 3.0.0
info:
title: Batch Inference API
version: 0.0.1
paths:
/batch_inference/submit_job:
post:
summary: Submit a batch inference job
description: |
This endpoint allows clients to submit a batch inference job using a model and a prompt file.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
model:
type: string
description: "The model identifier to be used for inference."
prompt_file_path:
$ref: '#/components/schemas/Path'
description: "Path to a JSONL file where each line is a JSON-encoded list of messages."
options:
$ref: '#/components/schemas/Options'
num_generations:
type: integer
description: "Number of generations to produce."
responses:
'200':
description: Batch inference job successfully submitted
content:
application/json:
schema:
$ref: '#/components/schemas/BatchInference'
/batch_inference/job_status:
get:
summary: Get status for an already submitted job
description: |
Retrieve the status and details of a previously submitted batch inference job using its unique job ID.
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: "Unique identifier for the batch inference job."
responses:
'200':
description: Batch inference job status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/BatchInference'
components:
schemas:
Message:
type: object
properties:
role:
type: string
text:
type: string
attachments:
type: array
items:
$ref: '#/components/schemas/MediaAttachment'
eot:
type: boolean
description: "End of transmission flag."
tool_call:
type: boolean
description: "Indicates if it's a tool call - builtin, custom, or ipython."
is_complete:
type: boolean
description: "For streaming, indicates if the message is complete."
is_header_complete:
type: boolean
description: "For streaming, indicates if the header of the message is complete."
metadata:
type: object
additionalProperties: true
description: "Additional metadata as JSON."
MediaAttachment:
type: object
properties:
attachment_type:
$ref: '#/components/schemas/MediaAttachmentType'
data_type:
$ref: '#/components/schemas/MediaAttachmentDataType'
data:
type: string
MediaAttachmentType:
type: string
enum:
- image
- video
- audio
- text
description: "Type of media attachment."
MediaAttachmentDataType:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data type of the media attachment."
BatchInference:
type: object
properties:
job_id:
type: string
description: "ID provided by the API for the job."
created:
type: string
format: date-time
description: "Timestamp when the job was created."
status:
type: string
enum:
- validating
- running
- completed
- failed
description: "Current status of the job."
input_file_path:
$ref: '#/components/schemas/Path'
success_file_path:
$ref: '#/components/schemas/Path'
error_file_path:
$ref: '#/components/schemas/Path'
metadata:
type: object
additionalProperties: true
description: "Additional metadata related to the job."
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
Path:
type: object
properties:
value:
type: string
description: "The path value."
type:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data Type of the path."

View file

@ -1,140 +0,0 @@
openapi: 3.0.0
info:
title: Chat Completion API
version: 0.0.1
paths:
/chat_completion/:
post:
summary: Submit a chat completion request
description: |
This endpoint allows clients to submit a chat completion request.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
messages:
type: array
items:
$ref: '#/components/schemas/Message'
model:
type: string
options:
$ref: '#/components/schemas/Options'
n_completions:
type: integer
responses:
'200':
description: Successful response
content:
application/json:
schema:
type: object
properties:
id:
type: string
candidates:
type: array
items:
$ref: '#/components/schemas/Completion'
model_called:
type: string
usage:
$ref: '#/components/schemas/TokenUsage'
components:
schemas:
Message:
type: object
properties:
role:
type: string
text:
type: string
attachments:
type: array
items:
$ref: '#/components/schemas/MediaAttachment'
eot:
type: boolean
description: "End of transmission flag."
tool_call:
type: boolean
description: "Indicates if it's a tool call - builtin, custom, or ipython."
is_complete:
type: boolean
description: "For streaming, indicates if the message is complete."
is_header_complete:
type: boolean
description: "For streaming, indicates if the header of the message is complete."
metadata:
type: object
additionalProperties: true
description: "Additional metadata as JSON."
MediaAttachment:
type: object
properties:
attachment_type:
$ref: '#/components/schemas/MediaAttachmentType'
data_type:
$ref: '#/components/schemas/MediaAttachmentDataType'
data:
type: string
MediaAttachmentType:
type: string
enum:
- image
- video
- audio
- text
description: "Type of media attachment."
MediaAttachmentDataType:
type: string
enum:
- raw_bytes
- filepath
- uri
description: "Data type of the media attachment."
Completion:
type: object
properties:
id:
type: string
message:
$ref: '#/components/schemas/Message'
tokens:
type: array
items:
type: integer
logprobs:
type: array
items:
type: number
finish_reason:
type: string
enum:
- stop
- safety
- max-length
description: "Reason for completion termination."
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
TokenUsage:
type: object
properties:
input_tokens:
type: integer
output_tokens:
type: integer
total_tokens:
type: integer

View file

@ -1,266 +0,0 @@
openapi: 3.0.0
info:
title: Fine Tuning API
version: 0.0.1
description: API for managing fine tuning jobs for machine learning models.
paths:
/fine_tuning/jobs/submit:
post:
summary: Submit a fine tuning job
description: Submit a fine tuning job with the specified configuration.
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/Config'
responses:
200:
description: Successfully submitted the fine tuning job.
content:
application/json:
schema:
$ref: '#/components/schemas/FineTuningJob'
/fine_tuning/jobs/status:
get:
summary: Gets last N fine tuning jobs
description: Retrieve the status of the last N fine tuning jobs based on the provided job ID.
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: The ID of the job to retrieve status for.
responses:
200:
description: Successfully retrieved the job status.
content:
application/json:
schema:
$ref: '#/components/schemas/FineTuningJob'
/fine_tuning/jobs/cancel:
post:
summary: Cancel provided job
description: Cancel the fine tuning job with the specified job ID.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
job_id:
type: string
responses:
200:
description: Successfully cancelled the fine tuning job.
content:
application/json:
schema:
$ref: '#/components/schemas/FineTuningJob'
/fine_tuning/jobs/tail:
get:
summary: Tail logs of a particular job
description: Stream the logs of a particular job in real-time. This endpoint supports streaming responses.
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: The ID of the job to tail logs for.
responses:
200:
description: Streaming logs in real-time.
content:
application/x-ndjson:
schema:
type: object
properties:
logs:
type: array
items:
$ref: '#/components/schemas/Log'
headers:
Content-Type:
schema:
type: string
default: 'application/x-ndjson'
Transfer-Encoding:
schema:
type: string
default: 'chunked'
components:
schemas:
Message:
# keep in sync with /chat_completion
TrainingDataItem:
type: object
properties:
dialog:
type: array
items:
$ref: '#/components/schemas/Message'
keep_loss:
type: array
items:
type: boolean
WandBLogger:
type: object
properties:
project:
type: string
description: The project name in WandB where logs will be stored.
DiskLogger:
type: object
properties:
filename:
type: string
description: The filename where logs will be stored on disk.
FullFineTuneOptions:
type: object
properties:
enable_activation_checkpointing:
type: boolean
default: true
memory_efficient_fsdp_wrap:
type: boolean
default: true
fsdp_cpu_offload:
type: boolean
default: true
LoraFineTuneOptions:
type: object
properties:
lora_attn_modules:
type: array
items:
type: string
apply_lora_to_mlp:
type: boolean
default: false
apply_lora_to_output:
type: boolean
default: false
lora_rank:
type: integer
lora_alpha:
type: integer
FineTuningOptions:
type: object
properties:
n_epochs:
type: integer
batch_size:
type: integer
lr:
type: number
format: float
gradient_accumulation_steps:
type: integer
seed:
type: integer
shuffle:
type: boolean
custom_training_options:
oneOf:
- $ref: '#/components/schemas/FullFineTuneOptions'
- $ref: '#/components/schemas/LoraFineTuneOptions'
discriminator:
propertyName: finetuning_type
extras:
# json to put other config overrides that are required by torchtune
type: object
additionalProperties: true
Config:
type: object
properties:
model:
type: string
description: The model identifier that you want to fine tune.
data:
type: string
format: uri
description: Path to the JSONL file with each row representing a TrainingDataItem.
validation_data:
type: string
format: uri
description: Path to the JSONL file used for validation metrics.
fine_tuning_options:
$ref: '#/components/schemas/FineTuningOptions'
logger:
oneOf:
- $ref: '#/components/schemas/DiskLogger'
- $ref: '#/components/schemas/WandBLogger'
discriminator:
propertyName: log_type
overrides:
# eg. --nproc_per_node 4 instead of default that we need to pass through to torchrun
# when running locally
type: string
description: Custom override options for the fine tuning process.
metadata:
type: object
additionalProperties: true
FineTuningJob:
type: object
properties:
job_id:
type: string
description: Unique identifier for the fine tuning job.
created:
type: string
format: date-time
description: The creation date and time of the job.
finished_at:
type: string
format: date-time
description: The completion date and time of the job.
status:
type: string
enum: [validation, queued, running, failed, success, cancelled]
description: The current status of the job.
error_path:
type: string
format: uri
description: Path to the error log file.
checkpoints:
type: array
items:
type: string
format: uri
description: List of paths to checkpoint files for various epochs.
logs:
type: string
format: uri
description: Path to the logs, either local or a WandB URI.
input_config:
$ref: '#/components/schemas/Config'
metadata:
type: object
additionalProperties: true
Log:
type: object
properties:
message:
type: string
description: The log message.
timestamp:
type: string
format: date-time
description: The timestamp of the log message.

File diff suppressed because it is too large Load diff

View file

@ -1,162 +0,0 @@
openapi: 3.0.0
info:
title: Reward Model Service API
version: 0.0.1
paths:
/reward_model_scoring/:
post:
summary: Score a prompt-response pair using a reward model
description: |
This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
prompt_with_response:
type: array
items:
$ref: '#/components/schemas/Message'
description: "Prompt and response joined as a list of messages."
reward_model:
type: string
description: "Identifier for the reward model to be used."
scoring_function:
$ref: '#/components/schemas/ScoringFunction'
options:
$ref: '#/components/schemas/Options'
responses:
'200':
description: Scoring completed successfully
content:
application/json:
schema:
type: object
properties:
id:
type: string
logprob:
type: number
format: float
score:
type: number
format: float
/reward_model_scoring/submit_job/:
post:
summary: Batch scoring using reward models
description: |
Submit a batch job for scoring multiple prompt-response pairs using a reward model.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
reward_model:
type: string
description: "Identifier for the reward model to be used."
prompt_with_response_path:
$ref: '#/components/schemas/Path'
description: "Path to a JSONL file where each line is a List[Message] and custom_id."
scoring_function:
$ref: '#/components/schemas/ScoringFunction'
metadata:
type: object
additionalProperties: true
description: "Metadata to carry forward in the response."
responses:
'200':
description: Batch scoring job successfully submitted
content:
application/json:
schema:
$ref: '#/components/schemas/BatchRewardModelScoringJob'
/reward_model_scoring/submit_job/job_status:
get:
summary: Get status for an already submitted job
description: |
Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: "Unique identifier for the batch scoring job."
responses:
'200':
description: Batch scoring job status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/BatchRewardModelScoringJob'
components:
schemas:
Message:
# reuse from /chat_completion
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
# TODO: Add/Remove more reward model specific params
ScoringFunction:
type: object
properties:
name:
type: string
params:
type: object
additionalProperties: true
Path:
type: object
properties:
value:
type: string
type:
type: string
enum:
- raw_bytes
- filepath
- uri
BatchRewardModelScoringJob:
type: object
properties:
job_id:
type: string
created:
type: string
format: date-time
status:
type: string
enum:
- validating
- running
- completed
- failed
input_file_path:
$ref: '#/components/schemas/Path'
success_file_path:
$ref: '#/components/schemas/Path'
error_file_path:
$ref: '#/components/schemas/Path'
metadata:
type: object
additionalProperties: true
description: "Metadata carried forward from the job submission."

View file

@ -1,65 +0,0 @@
== Schema ==
Message:
role: str
text: str
attachements: List[MediaAttachment]
eot: bool
tool_call: bool # if it's a tool call - builtin or custom or ipython
# for streaming
is_complete: bool
is_header_complete: bool
metadata: json
MediaAttachment:
attachement_type: MediaAttachmentType
data_type: MediaAttachmentDataType
data: str
MediaAttachmentType: # enum [image, video, audio, text(or file)]
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
BatchInference:
job_id: str # id provided by the api
created: string # format - date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path # jsonl style file where each
success_file_path: Path
error_file_path: Path
metadata: json
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
Path:
value: string
type: string # enum [raw_bytes, filepath, uri]
== Callsites ==
callsite:
/batch_inference/submit_job
request_type:
post
description:
Submit a batch inference job
request:
model: str
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message]
options: Options
num_generations: int
response:
batch_inference_job: BatchInference
callsite:
/batch_inference/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
batch_inference_job: BatchInference

View file

@ -1,72 +0,0 @@
# Simple bullet form for ease of read and iteration
# Use LLMs to translate this to a OpenAPI spec.
== Schema ==
Message:
role: str
text: str
attachements: List[MediaAttachment]
eot: bool
tool_call: bool # if it's a tool call - builtin or custom or ipython
# for streaming
is_complete: bool
is_header_complete: bool
metadata: json
MediaAttachment:
attachement_type: MediaAttachmentType
data_type: MediaAttachmentDataType
data: str
MediaAttachmentType: # enum [image, video, audio, text(or file)]
MediaAttachmentDataType: # enum [raw_bytes, filepath, uri]
Completion:
id: str
message: Message
tokens: List[int]
logprobs: List[floats]
finish_reason: str # Enum (stop, safety, max-length, etc)
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
#TODO: Get more options from metagen
TokenUsage:
input_tokens: int
output_tokens: int
total_tokens: int
== Callsite ==
callsite:
chat_completion/
request_type:
post
description:
submit a chat completion request
request:
messages: List[Message]
model: str
options: Options
n_complections: int
# TODO: how to handle tooling control if any ?
# Add `tools` and `tool_choice` --
# for eg. "auto": use model's guess
# how to force to use particular tool
# how to disbale inbuilt tools
# tools: List[Tool]
# tool_choice: Any
response:
id: str
candidates: List[Completion] # a list to account for when n_completions > 1
model_called: str # info on that model that produced this result
usage: TokenUsage
# TODO
# callsite:
# chat_completion_stream/

View file

@ -1,134 +0,0 @@
# Fine Tuning APIs
== Schema ==
TrainingDataItem:
dialog: List[Message]
keep_loss: List[bool]
WandBLogger:
project: str
DiskLogger:
# log_dir will be pre-configured in environment
filename: str
FullFineTuneOptions:
enable_activation_checkpointing: True
memory_efficient_fsdp_wrap: True
fsdp_cpu_offload: True
LoraFineTuneOptions:
lora_attn_modules: ['q_proj', 'v_proj']
apply_lora_to_mlp: False
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
FineTuningOptions:
n_epochs: int
batch_size: int
lr: float
gradient_accumulation_steps: int
seed: int
shuffle: bool
# Unions in OpenAPI with a reference field that can help disambiguate
custom_training_options:
discriminator:
propertyName: fine_tuning_type
mapping:
fft: FullFineTuneOptions
lora: LoraFineTuneOptions
# other options that can be passed in
extras: json
Config:
model: str # model that you want to fine tune
data: Path # jsonl with each row representing a TrainingDataItem
validation_data: Path # same as data but to get validation metrics on
# fine tuning args
fine_tuning_options: FineTuningOptions
# metric logging
logger:
discriminator:
propertyName: log_type
mapping:
disk: DiskLogger
wandb: WandBLogger
# Override options
# eg. --nproc_per_node 4 insted of defaults,
# this might be impl specific and can allow for various customizations
overrides: str
metadata: json # to carry over to job details
FineTuningJob:
job_id: str
created: str # format date-time
finished_at: str # format date-time
status: str # enum - validation, queued, running, failed, success, cancelled
error_path: Path # error logging
checkpoints: List[Path] # checkpoints for various epochs
logs: Path # local path / wandb uri
input_config: Config # config used to submit this job
metadata: json # carried over rom user provided input
Log:
message: string # The log message.
timestamp: string # format: date-time
== Callsites ==
callsite:
/fine_tuning/jobs/submit
request_type:
post
description:
Submit a fine tuning job
request:
config: Config
response:
fine_tuning_job: FineTuningJob
callsite:
/fine_tuning/jobs/status
request_type:
get
description:
Gets last N fine tuning jobs
request:
job_id: str
response:
fine_tuning_job: FineTuningJob
callsite:
/fine_tuning/jobs/cancel
request_type:
post
description:
Cancel provided job
request:
job_id: str
response:
fine_tuning_job: FineTuningJob
callsite:
/fine_tuning/jobs/tail
request_type:
get
description:
Tail logs of a particular job
request:
job_id: str
response:
logs: List[Log]
streaming:
enabled: True
chunkSize: 1024

View file

@ -1,70 +0,0 @@
# Reward Model Service
== Schema ==
Message:
# Same as /chat_completion
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
#TODO: Figure out what other reward model specific params
ScoringFunction:
name: str
params: json
BatchRewardModelScoringJob:
job_id: str
created: str # format date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path
success_file_path: Path # jsonl where each row has {custom_id: <from input>, logprob: <float>, score: float}
error_file_path: Path # jsonl where each row has {custom_id: <from input>, error: <error_info>}
metadata: json # carry forward from job submission api
== Callsites ==
callsite:
reward_model_scoring/
request_type:
post
description:
Score a prompt-response pair using a reward model
request:
prompt_with_response: List[Message] # prompt and response joined as a List[Message]
reward_model: str
scoring_function: ScoringFunction
options: Options
response:
id: str
logprob: float
score: float
callsite:
reward_model_scoring/submit_job/
request_type:
post
description:
Batch scoring using reward models
request:
reward_model: str
prompt_with_response_path: Path # jsonl file where each line is a List[Message] and custom_id
scoring_function: ScoringFunction
metadata: json # anything to carry forward over in the response
response:
batch_reward_model_scoring_job: BatchRewardModelScoringJob
callsite:
/reward_model_scoring/submit_job/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
batch_reward_model_scoring_job: BatchRewardModelScoringJob

View file

@ -1,58 +0,0 @@
# Synthetic Data Generation API
== Schema ==
FilteringFunction:
name: str
params: json
SyntheticDataPoint:
custom_id: str
index: int
prompt: List[Message]
response: Message
logprob: float
score: float
SyntheticDataGenerationJob:
job_id: str # id provided by the api
created: string # format - date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path # jsonl style file where each row contains custom_id and message_list
success_file_path: Path # jsonl each line is SyntheticDataPoint
error_file_path: Path # custom_ids where we failed with some info
metadata: json
== Callsites ==
callsite:
/synthetic_data_gen/submit_job
request_type:
post
description:
Submit a job to generate synthetic data using llm + reward model scoring + filtering
request:
# batch inference params
model: str
prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message] + custom_id
options: Options
num_generations: int
# reward model scoring params
reward_model: str
scoring_function: ScoringFunction
# filtering params
filtering_function: FilteringFunction
metadata: json
response:
synth_data_gen_job: SyntheticDataGenerationJob
callsite:
/synthetic_data_gen/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
synth_data_gen_job: SyntheticDataGenerationJob

View file

@ -1,131 +0,0 @@
openapi: 3.0.0
info:
title: Synthetic Data Generation API
version: 0.0.1
paths:
/synthetic_data_gen/submit_job:
post:
summary: Submit a job to generate synthetic data
description: Submit a job to generate synthetic data using llm + reward model scoring + filtering
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
model:
type: string
description: Model used for batch inference
prompt_file_path:
type: string
format: path
description: Path to the JSONL file containing message_lists and custom IDs
options:
$ref: '#/components/schemas/Options'
num_generations:
type: integer
description: Number of generations to produce
reward_model:
type: string
description: Model used for scoring
scoring_function:
$ref: '#/components/schemas/ScoringFunction'
filtering_function:
$ref: '#/components/schemas/FilteringFunction'
metadata:
type: object
additionalProperties: true
description: Additional metadata for the job
responses:
'200':
description: Job successfully submitted
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationJob'
/synthetic_data_gen/job_status:
get:
summary: Get job status
description: Get status for an already submitted job
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: Unique identifier for the job
responses:
'200':
description: Job status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationJob'
components:
schemas:
FilteringFunction:
type: object
properties:
name:
type: string
description: Name of the filtering function
params:
type: object
additionalProperties: true
description: JSON object containing parameters for the filtering function
SyntheticDataPoint:
type: object
properties:
custom_id:
type: string
description: Custom identifier for the data point
index:
type: integer
description: Index of the data point
prompt:
type: array
items:
$ref: '#/components/schemas/Message'
description: List of messages used as prompt
response:
$ref: '#/components/schemas/Message'
logprob:
type: number
format: float
description: Log probability of the response
score:
type: number
format: float
description: Score of the response based on the reward model
SyntheticDataGenerationJob:
type: object
properties:
job_id:
type: string
description: ID provided by the API
created:
type: string
format: date-time
description: Timestamp when the job was created
status:
type: string
enum: [validating, running, completed, failed]
description: Current status of the job
input_file_path:
type: string
format: path
description: Path to the input JSONL file
success_file_path:
type: string
format: path
description: Path to the JSONL file containing successful results
error_file_path:
type: string
format: path
description: Path to the JSONL file containing errors
metadata:
type: object
additionalProperties: true
description: Additional metadata about the job