finetuning

This commit is contained in:
Ashwin Bharambe 2024-07-10 20:47:05 -07:00
parent 956f07b04c
commit 69ecf55de2
5 changed files with 1334 additions and 28 deletions

View file

@ -172,13 +172,20 @@ components:
tool_response_delta:
additionalProperties: false
properties:
response:
type: string
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
tool_name:
type: string
required:
- tool_name
- response
- content
type: object
violation:
additionalProperties: false
@ -274,13 +281,20 @@ components:
items:
additionalProperties: false
properties:
response:
type: string
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
tool_name:
type: string
required:
- tool_name
- response
- content
type: object
type: array
uuid:
@ -690,6 +704,224 @@ components:
- text_delta
title: streamed completion response.
type: object
CreateDatasetRequest:
additionalProperties: false
properties:
dataset:
$ref: '#/components/schemas/Dataset'
uuid:
type: string
required:
- uuid
- dataset
title: Request to create a dataset.
type: object
Dataset:
additionalProperties: false
properties:
columns:
additionalProperties:
enum:
- dialog
- text
- media
- number
- json
type: string
type: object
content_url:
$ref: '#/components/schemas/URL'
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
required:
- columns
- content_url
- metadata
title: Dataset to be used for training or evaluating language models.
type: object
FinetuningJobLogStream:
additionalProperties: false
properties:
job_uuid:
type: string
log_lines:
items:
type: string
type: array
required:
- job_uuid
- log_lines
title: Stream of logs from a finetuning job.
type: object
FinetuningJobStatusResponse:
additionalProperties: false
properties:
completed_at:
format: date-time
type: string
job_uuid:
type: string
resources_allocated:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
scheduled_at:
format: date-time
type: string
started_at:
format: date-time
type: string
status:
enum:
- running
- completed
- failed
- scheduled
type: string
required:
- job_uuid
- status
title: Status of a finetuning job.
type: object
FinetuningTrainRequest:
additionalProperties: false
properties:
algorithm:
enum:
- full
- lora
- qlora
- dora
type: string
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
- additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
dataset:
$ref: '#/components/schemas/Dataset'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model:
enum:
- llama3_8b
- llama3_70b
type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
LoraFinetuningConfig:
additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
Message:
additionalProperties: false
properties:
@ -735,13 +967,20 @@ components:
items:
additionalProperties: false
properties:
response:
type: string
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
tool_name:
type: string
required:
- tool_name
- response
- content
type: object
type: array
required:
@ -750,6 +989,27 @@ components:
- tool_calls
- tool_responses
type: object
OptimizerConfig:
additionalProperties: false
properties:
lr:
type: number
lr_min:
type: number
optimizer_type:
enum:
- adam
- adamw
- sgd
type: string
weight_decay:
type: number
required:
- optimizer_type
- lr
- lr_min
- weight_decay
type: object
RewardScoringRequest:
additionalProperties: false
properties:
@ -885,6 +1145,32 @@ components:
title: Response from the synthetic data generation. Batch of (prompt, response,
score) tuples that pass the threshold.
type: object
TrainingConfig:
additionalProperties: false
properties:
batch_size:
type: integer
enable_activation_checkpointing:
type: boolean
fsdp_cpu_offload:
type: boolean
memory_efficient_fsdp_wrap:
type: boolean
n_epochs:
type: integer
n_iters:
type: integer
shuffle:
type: boolean
required:
- n_epochs
- batch_size
- shuffle
- n_iters
- enable_activation_checkpointing
- memory_efficient_fsdp_wrap
- fsdp_cpu_offload
type: object
URL:
format: uri
pattern: ^(https?://|file://|data:)
@ -989,6 +1275,98 @@ paths:
description: Normal completion response. **OR** streamed completion response.
tags:
- Inference
/datasets/create:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateDatasetRequest'
required: true
responses:
'200':
description: OK
tags:
- Datasets
/datasets/delete:
delete:
parameters:
- in: query
name: dataset_id
required: true
schema:
type: string
responses:
'200':
description: OK
tags:
- Datasets
/datasets/get:
get:
parameters:
- in: query
name: dataset_id
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
description: OK
tags:
- Datasets
/finetuning/job/logs:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobLogStream'
description: OK
tags:
- Finetuning
/finetuning/job/status:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobStatusResponse'
description: OK
tags:
- Finetuning
/finetuning/text_generation/train:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningTrainRequest'
required: true
responses:
'200':
description: OK
tags:
- Finetuning
/reward_scoring/score:
post:
parameters: []
@ -1030,10 +1408,12 @@ security:
servers:
- url: http://llama.meta.com
tags:
- name: RewardScoring
- name: Inference
- name: SyntheticDataGeneration
- name: RewardScoring
- name: AgenticSystem
- name: Finetuning
- name: Inference
- name: Datasets
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
name: ShieldConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1072,6 +1452,27 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/AgenticSystemExecuteResponseStreamChunk"
/>'
name: AgenticSystemExecuteResponseStreamChunk
- description: 'Request to create a dataset.
<SchemaDefinition schemaRef="#/components/schemas/CreateDatasetRequest" />'
name: CreateDatasetRequest
- description: 'Dataset to be used for training or evaluating language models.
<SchemaDefinition schemaRef="#/components/schemas/Dataset" />'
name: Dataset
- description: 'Status of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobStatusResponse"
/>'
name: FinetuningJobStatusResponse
- description: 'Stream of logs from a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
name: FinetuningJobLogStream
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
@ -1127,10 +1528,25 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
name: RewardScoringResponse
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningTrainRequest" />'
name: FinetuningTrainRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
/>
name: LoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
/>
name: OptimizerConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
name: TrainingConfig
x-tagGroups:
- name: Operations
tags:
- AgenticSystem
- Datasets
- Finetuning
- Inference
- RewardScoring
- SyntheticDataGeneration
@ -1149,10 +1565,18 @@ x-tagGroups:
- CompletionRequest
- CompletionResponse
- CompletionResponseStreamChunk
- CreateDatasetRequest
- Dataset
- FinetuningJobLogStream
- FinetuningJobStatusResponse
- FinetuningTrainRequest
- LoraFinetuningConfig
- Message
- OptimizerConfig
- RewardScoringRequest
- RewardScoringResponse
- ShieldConfig
- SyntheticDataGenerationRequest
- SyntheticDataGenerationResponse
- TrainingConfig
- URL