added DPO

This commit is contained in:
Ashwin Bharambe 2024-07-11 00:01:58 -07:00
parent 7cade3acc3
commit 631328f556
4 changed files with 796 additions and 472 deletions

View file

@ -12,19 +12,6 @@ from agentic_system_types import (
SafetyViolation,
)
from finetuning_types import (
Checkpoint,
Dataset,
DoraFinetuningConfig,
FinetuningAlgorithm,
FinetuningJobLogStream,
FinetuningJobStatus,
LoraFinetuningConfig,
OptimizerConfig,
QLoraFinetuningConfig,
TrainingConfig,
)
from model_types import (
BuiltinTool,
Content,
@ -42,6 +29,21 @@ from model_types import (
URL,
)
from post_training_types import (
Checkpoint,
Dataset,
DoraFinetuningConfig,
DPOAlignmentConfig,
FinetuningAlgorithm,
LoraFinetuningConfig,
OptimizerConfig,
PostTrainingJobLogStream,
PostTrainingJobStatus,
QLoraFinetuningConfig,
RLHFAlgorithm,
TrainingConfig,
)
from pyopenapi import Info, Options, Server, Specification, webmethod
from strong_typing.schema import json_schema_type
@ -408,7 +410,7 @@ class Datasets(Protocol):
@json_schema_type
@dataclass
class FinetuningTrainRequest:
class PostTrainingSFTRequest:
"""Request to finetune a model."""
job_uuid: str
@ -432,11 +434,34 @@ class FinetuningTrainRequest:
@json_schema_type
@dataclass
class FinetuningJobStatusResponse:
class PostTrainingRLHFRequest:
"""Request to finetune a model."""
job_uuid: str
finetuned_model: URL
dataset: Dataset
validation_dataset: Dataset
algorithm: RLHFAlgorithm
algorithm_config: Union[DPOAlignmentConfig]
optimizer_config: OptimizerConfig
training_config: TrainingConfig
# TODO: define these
hyperparam_search_config: Dict[str, Any]
logger_config: Dict[str, Any]
@json_schema_type
@dataclass
class PostTrainingJobStatusResponse:
"""Status of a finetuning job."""
job_uuid: str
status: FinetuningJobStatus
status: PostTrainingJobStatus
scheduled_at: Optional[datetime] = None
started_at: Optional[datetime] = None
@ -449,7 +474,7 @@ class FinetuningJobStatusResponse:
@json_schema_type
@dataclass
class FinetuningJobArtifactsResponse:
class PostTrainingJobArtifactsResponse:
"""Artifacts of a finetuning job."""
job_uuid: str
@ -458,27 +483,35 @@ class FinetuningJobArtifactsResponse:
# TODO(ashwin): metrics, evals
class Finetuning(Protocol):
@webmethod(route="/finetuning/text_generation/train")
def post_train(
class PostTraining(Protocol):
@webmethod(route="/post_training/supervised_fine_tune/")
def post_supervised_fine_tune(
self,
request: FinetuningTrainRequest,
request: PostTrainingSFTRequest,
) -> None: ...
@webmethod(route="/post_training/preference_optimize/")
def post_preference_optimize(
self,
request: PostTrainingRLHFRequest,
) -> None: ...
# sends SSE stream of logs
@webmethod(route="/finetuning/job/logs")
def get_training_log_stream(self, job_uuid: str) -> FinetuningJobLogStream: ...
@webmethod(route="/post_training/job/logs")
def get_training_log_stream(self, job_uuid: str) -> PostTrainingJobLogStream: ...
@webmethod(route="/finetuning/job/status")
def get_training_job_status(self, job_uuid: str) -> FinetuningJobStatusResponse: ...
@webmethod(route="/post_training/job/status")
def get_training_job_status(
self, job_uuid: str
) -> PostTrainingJobStatusResponse: ...
@webmethod(route="/finetuning/job/cancel")
@webmethod(route="/post_training/job/cancel")
def cancel_training_job(self, job_uuid: str) -> None: ...
@webmethod(route="/finetuning/job/artifacts")
@webmethod(route="/post_training/job/artifacts")
def get_training_job_artifacts(
self, job_uuid: str
) -> FinetuningJobArtifactsResponse: ...
) -> PostTrainingJobArtifactsResponse: ...
class LlamaStackEndpoints(
@ -487,7 +520,7 @@ class LlamaStackEndpoints(
RewardScoring,
SyntheticDataGeneration,
Datasets,
Finetuning,
PostTraining,
MemoryBanks,
): ...

View file

@ -299,7 +299,7 @@
"parameters": []
}
},
"/finetuning/job/artifacts": {
"/post_training/job/artifacts": {
"get": {
"responses": {
"200": {
@ -307,14 +307,14 @@
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FinetuningJobArtifactsResponse"
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
}
}
}
}
},
"tags": [
"Finetuning"
"PostTraining"
],
"parameters": [
{
@ -328,7 +328,7 @@
]
}
},
"/finetuning/job/status": {
"/post_training/job/status": {
"get": {
"responses": {
"200": {
@ -336,14 +336,14 @@
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FinetuningJobStatusResponse"
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
}
}
}
}
},
"tags": [
"Finetuning"
"PostTraining"
],
"parameters": [
{
@ -357,7 +357,7 @@
]
}
},
"/finetuning/job/logs": {
"/post_training/job/logs": {
"get": {
"responses": {
"200": {
@ -365,14 +365,14 @@
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FinetuningJobLogStream"
"$ref": "#/components/schemas/PostTrainingJobLogStream"
}
}
}
}
},
"tags": [
"Finetuning"
"PostTraining"
],
"parameters": [
{
@ -664,6 +664,29 @@
}
}
},
"/post_training/preference_optimize/": {
"post": {
"responses": {
"200": {
"description": "OK"
}
},
"tags": [
"PostTraining"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PostTrainingRLHFRequest"
}
}
},
"required": true
}
}
},
"/reward_scoring/score": {
"post": {
"responses": {
@ -694,7 +717,7 @@
}
}
},
"/finetuning/text_generation/train": {
"/post_training/supervised_fine_tune/": {
"post": {
"responses": {
"200": {
@ -702,14 +725,14 @@
}
},
"tags": [
"Finetuning"
"PostTraining"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FinetuningTrainRequest"
"$ref": "#/components/schemas/PostTrainingSFTRequest"
}
}
},
@ -1697,7 +1720,7 @@
"name"
]
},
"FinetuningJobArtifactsResponse": {
"PostTrainingJobArtifactsResponse": {
"type": "object",
"properties": {
"job_uuid": {
@ -1730,7 +1753,7 @@
],
"title": "Artifacts of a finetuning job."
},
"FinetuningJobStatusResponse": {
"PostTrainingJobStatusResponse": {
"type": "object",
"properties": {
"job_uuid": {
@ -1810,7 +1833,7 @@
],
"title": "Status of a finetuning job."
},
"FinetuningJobLogStream": {
"PostTrainingJobLogStream": {
"type": "object",
"properties": {
"job_uuid": {
@ -2672,6 +2695,191 @@
],
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
},
"DPOAlignmentConfig": {
"type": "object",
"properties": {
"reward_scale": {
"type": "number"
},
"reward_clip": {
"type": "number"
},
"epsilon": {
"type": "number"
},
"gamma": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"reward_scale",
"reward_clip",
"epsilon",
"gamma"
]
},
"OptimizerConfig": {
"type": "object",
"properties": {
"optimizer_type": {
"type": "string",
"enum": [
"adam",
"adamw",
"sgd"
]
},
"lr": {
"type": "number"
},
"lr_min": {
"type": "number"
},
"weight_decay": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"optimizer_type",
"lr",
"lr_min",
"weight_decay"
]
},
"PostTrainingRLHFRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string"
},
"finetuned_model": {
"$ref": "#/components/schemas/URL"
},
"dataset": {
"$ref": "#/components/schemas/Dataset"
},
"validation_dataset": {
"$ref": "#/components/schemas/Dataset"
},
"algorithm": {
"type": "string",
"enum": [
"dpo"
]
},
"algorithm_config": {
"$ref": "#/components/schemas/DPOAlignmentConfig"
},
"optimizer_config": {
"$ref": "#/components/schemas/OptimizerConfig"
},
"training_config": {
"$ref": "#/components/schemas/TrainingConfig"
},
"hyperparam_search_config": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"logger_config": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"job_uuid",
"finetuned_model",
"dataset",
"validation_dataset",
"algorithm",
"algorithm_config",
"optimizer_config",
"training_config",
"hyperparam_search_config",
"logger_config"
],
"title": "Request to finetune a model."
},
"TrainingConfig": {
"type": "object",
"properties": {
"n_epochs": {
"type": "integer"
},
"batch_size": {
"type": "integer"
},
"shuffle": {
"type": "boolean"
},
"n_iters": {
"type": "integer"
},
"enable_activation_checkpointing": {
"type": "boolean"
},
"memory_efficient_fsdp_wrap": {
"type": "boolean"
},
"fsdp_cpu_offload": {
"type": "boolean"
}
},
"additionalProperties": false,
"required": [
"n_epochs",
"batch_size",
"shuffle",
"n_iters",
"enable_activation_checkpointing",
"memory_efficient_fsdp_wrap",
"fsdp_cpu_offload"
]
},
"RewardScoringRequest": {
"type": "object",
"properties": {
@ -2727,7 +2935,69 @@
],
"title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
},
"FinetuningTrainRequest": {
"DoraFinetuningConfig": {
"type": "object",
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
},
"LoraFinetuningConfig": {
"type": "object",
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
},
"PostTrainingSFTRequest": {
"type": "object",
"properties": {
"job_uuid": {
@ -2761,66 +3031,10 @@
"$ref": "#/components/schemas/LoraFinetuningConfig"
},
{
"type": "object",
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
"$ref": "#/components/schemas/QLoraFinetuningConfig"
},
{
"type": "object",
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
"$ref": "#/components/schemas/DoraFinetuningConfig"
}
]
},
@ -2896,7 +3110,7 @@
],
"title": "Request to finetune a model."
},
"LoraFinetuningConfig": {
"QLoraFinetuningConfig": {
"type": "object",
"properties": {
"lora_attn_modules": {
@ -2926,71 +3140,6 @@
"rank",
"alpha"
]
},
"OptimizerConfig": {
"type": "object",
"properties": {
"optimizer_type": {
"type": "string",
"enum": [
"adam",
"adamw",
"sgd"
]
},
"lr": {
"type": "number"
},
"lr_min": {
"type": "number"
},
"weight_decay": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"optimizer_type",
"lr",
"lr_min",
"weight_decay"
]
},
"TrainingConfig": {
"type": "object",
"properties": {
"n_epochs": {
"type": "integer"
},
"batch_size": {
"type": "integer"
},
"shuffle": {
"type": "boolean"
},
"n_iters": {
"type": "integer"
},
"enable_activation_checkpointing": {
"type": "boolean"
},
"memory_efficient_fsdp_wrap": {
"type": "boolean"
},
"fsdp_cpu_offload": {
"type": "boolean"
}
},
"additionalProperties": false,
"required": [
"n_epochs",
"batch_size",
"shuffle",
"n_iters",
"enable_activation_checkpointing",
"memory_efficient_fsdp_wrap",
"fsdp_cpu_offload"
]
}
},
"responses": {}
@ -3001,27 +3150,27 @@
}
],
"tags": [
{
"name": "RewardScoring"
},
{
"name": "MemoryBanks"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "Finetuning"
},
{
"name": "AgenticSystem"
},
{
"name": "RewardScoring"
},
{
"name": "Inference"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "Datasets"
},
{
"name": "PostTraining"
},
{
"name": "MemoryBanks"
},
{
"name": "ShieldConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />"
@ -3075,16 +3224,16 @@
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
},
{
"name": "FinetuningJobArtifactsResponse",
"description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobArtifactsResponse\" />"
"name": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobArtifactsResponse\" />"
},
{
"name": "FinetuningJobStatusResponse",
"description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobStatusResponse\" />"
"name": "PostTrainingJobStatusResponse",
"description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobStatusResponse\" />"
},
{
"name": "FinetuningJobLogStream",
"description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />"
"name": "PostTrainingJobLogStream",
"description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobLogStream\" />"
},
{
"name": "BatchChatCompletionRequest",
@ -3138,6 +3287,22 @@
"name": "SyntheticDataGenerationResponse",
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
},
{
"name": "DPOAlignmentConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DPOAlignmentConfig\" />"
},
{
"name": "OptimizerConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
},
{
"name": "PostTrainingRLHFRequest",
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingRLHFRequest\" />"
},
{
"name": "TrainingConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
},
{
"name": "RewardScoringRequest",
"description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
@ -3147,20 +3312,20 @@
"description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
},
{
"name": "FinetuningTrainRequest",
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningTrainRequest\" />"
"name": "DoraFinetuningConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DoraFinetuningConfig\" />"
},
{
"name": "LoraFinetuningConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/LoraFinetuningConfig\" />"
},
{
"name": "OptimizerConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
"name": "PostTrainingSFTRequest",
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingSFTRequest\" />"
},
{
"name": "TrainingConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
"name": "QLoraFinetuningConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/QLoraFinetuningConfig\" />"
}
],
"x-tagGroups": [
@ -3169,9 +3334,9 @@
"tags": [
"AgenticSystem",
"Datasets",
"Finetuning",
"Inference",
"MemoryBanks",
"PostTraining",
"RewardScoring",
"SyntheticDataGeneration"
]
@ -3195,18 +3360,22 @@
"CompletionResponse",
"CompletionResponseStreamChunk",
"CreateDatasetRequest",
"DPOAlignmentConfig",
"Dataset",
"Dialog",
"FinetuningJobArtifactsResponse",
"FinetuningJobLogStream",
"FinetuningJobStatusResponse",
"FinetuningTrainRequest",
"DoraFinetuningConfig",
"KScoredPromptGenerations",
"LoraFinetuningConfig",
"MemoryBank",
"Message",
"MessageScore",
"OptimizerConfig",
"PostTrainingJobArtifactsResponse",
"PostTrainingJobLogStream",
"PostTrainingJobStatusResponse",
"PostTrainingRLHFRequest",
"PostTrainingSFTRequest",
"QLoraFinetuningConfig",
"RewardScoringRequest",
"RewardScoringResponse",
"ShieldConfig",

View file

@ -879,6 +879,23 @@ components:
- dataset
title: Request to create a dataset.
type: object
DPOAlignmentConfig:
additionalProperties: false
properties:
epsilon:
type: number
gamma:
type: number
reward_clip:
type: number
reward_scale:
type: number
required:
- reward_scale
- reward_clip
- epsilon
- gamma
type: object
Dataset:
additionalProperties: false
properties:
@ -923,107 +940,8 @@ components:
- message
- message_history
type: object
FinetuningJobArtifactsResponse:
DoraFinetuningConfig:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
job_uuid:
type: string
required:
- job_uuid
- checkpoints
title: Artifacts of a finetuning job.
type: object
FinetuningJobLogStream:
additionalProperties: false
properties:
job_uuid:
type: string
log_lines:
items:
type: string
type: array
required:
- job_uuid
- log_lines
title: Stream of logs from a finetuning job.
type: object
FinetuningJobStatusResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
completed_at:
format: date-time
type: string
job_uuid:
type: string
resources_allocated:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
scheduled_at:
format: date-time
type: string
started_at:
format: date-time
type: string
status:
enum:
- running
- completed
- failed
- scheduled
type: string
required:
- job_uuid
- status
- checkpoints
title: Status of a finetuning job.
type: object
FinetuningTrainRequest:
additionalProperties: false
properties:
algorithm:
enum:
- full
- lora
- qlora
- dora
type: string
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- additionalProperties: false
properties:
alpha:
type: integer
@ -1044,75 +962,6 @@ components:
- rank
- alpha
type: object
- additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
dataset:
$ref: '#/components/schemas/Dataset'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model:
enum:
- llama3_8b
- llama3_70b
type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
KScoredPromptGenerations:
additionalProperties: false
properties:
@ -1259,6 +1108,232 @@ components:
- lr_min
- weight_decay
type: object
PostTrainingJobArtifactsResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
job_uuid:
type: string
required:
- job_uuid
- checkpoints
title: Artifacts of a finetuning job.
type: object
PostTrainingJobLogStream:
additionalProperties: false
properties:
job_uuid:
type: string
log_lines:
items:
type: string
type: array
required:
- job_uuid
- log_lines
title: Stream of logs from a finetuning job.
type: object
PostTrainingJobStatusResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
completed_at:
format: date-time
type: string
job_uuid:
type: string
resources_allocated:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
scheduled_at:
format: date-time
type: string
started_at:
format: date-time
type: string
status:
enum:
- running
- completed
- failed
- scheduled
type: string
required:
- job_uuid
- status
- checkpoints
title: Status of a finetuning job.
type: object
PostTrainingRLHFRequest:
additionalProperties: false
properties:
algorithm:
enum:
- dpo
type: string
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
dataset:
$ref: '#/components/schemas/Dataset'
finetuned_model:
$ref: '#/components/schemas/URL'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- finetuned_model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
PostTrainingSFTRequest:
additionalProperties: false
properties:
algorithm:
enum:
- full
- lora
- qlora
- dora
type: string
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- $ref: '#/components/schemas/QLoraFinetuningConfig'
- $ref: '#/components/schemas/DoraFinetuningConfig'
dataset:
$ref: '#/components/schemas/Dataset'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model:
enum:
- llama3_8b
- llama3_70b
type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
QLoraFinetuningConfig:
additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
RewardScoringRequest:
additionalProperties: false
properties:
@ -1581,71 +1656,6 @@ paths:
description: OK
tags:
- Datasets
/finetuning/job/artifacts:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobArtifactsResponse'
description: OK
tags:
- Finetuning
/finetuning/job/logs:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobLogStream'
description: OK
tags:
- Finetuning
/finetuning/job/status:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobStatusResponse'
description: OK
tags:
- Finetuning
/finetuning/text_generation/train:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningTrainRequest'
required: true
responses:
'200':
description: OK
tags:
- Finetuning
/memory_banks/create:
post:
parameters:
@ -1787,6 +1797,85 @@ paths:
description: OK
tags:
- MemoryBanks
/post_training/job/artifacts:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
description: OK
tags:
- PostTraining
/post_training/job/logs:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobLogStream'
description: OK
tags:
- PostTraining
/post_training/job/status:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
description: OK
tags:
- PostTraining
/post_training/preference_optimize/:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingRLHFRequest'
required: true
responses:
'200':
description: OK
tags:
- PostTraining
/post_training/supervised_fine_tune/:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingSFTRequest'
required: true
responses:
'200':
description: OK
tags:
- PostTraining
/reward_scoring/score:
post:
parameters: []
@ -1828,13 +1917,13 @@ security:
servers:
- url: http://llama.meta.com
tags:
- name: RewardScoring
- name: MemoryBanks
- name: SyntheticDataGeneration
- name: Finetuning
- name: AgenticSystem
- name: RewardScoring
- name: Inference
- name: SyntheticDataGeneration
- name: Datasets
- name: PostTraining
- name: MemoryBanks
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
name: ShieldConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1888,20 +1977,20 @@ tags:
- description: 'Artifacts of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobArtifactsResponse"
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobArtifactsResponse"
/>'
name: FinetuningJobArtifactsResponse
name: PostTrainingJobArtifactsResponse
- description: 'Status of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobStatusResponse"
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobStatusResponse"
/>'
name: FinetuningJobStatusResponse
name: PostTrainingJobStatusResponse
- description: 'Stream of logs from a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
name: FinetuningJobLogStream
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobLogStream" />'
name: PostTrainingJobLogStream
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
/>
name: BatchChatCompletionRequest
@ -1961,6 +2050,19 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
/>'
name: SyntheticDataGenerationResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/DPOAlignmentConfig"
/>
name: DPOAlignmentConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
/>
name: OptimizerConfig
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingRLHFRequest" />'
name: PostTrainingRLHFRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
name: TrainingConfig
- description: 'Request to score a reward function. A list of prompts and a list of
responses per prompt.
@ -1973,27 +2075,28 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
name: RewardScoringResponse
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningTrainRequest" />'
name: FinetuningTrainRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
/>
name: DoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
/>
name: LoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
name: PostTrainingSFTRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
/>
name: OptimizerConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
name: TrainingConfig
name: QLoraFinetuningConfig
x-tagGroups:
- name: Operations
tags:
- AgenticSystem
- Datasets
- Finetuning
- Inference
- MemoryBanks
- PostTraining
- RewardScoring
- SyntheticDataGeneration
- name: Types
@ -2014,18 +2117,22 @@ x-tagGroups:
- CompletionResponse
- CompletionResponseStreamChunk
- CreateDatasetRequest
- DPOAlignmentConfig
- Dataset
- Dialog
- FinetuningJobArtifactsResponse
- FinetuningJobLogStream
- FinetuningJobStatusResponse
- FinetuningTrainRequest
- DoraFinetuningConfig
- KScoredPromptGenerations
- LoraFinetuningConfig
- MemoryBank
- Message
- MessageScore
- OptimizerConfig
- PostTrainingJobArtifactsResponse
- PostTrainingJobLogStream
- PostTrainingJobStatusResponse
- PostTrainingRLHFRequest
- PostTrainingSFTRequest
- QLoraFinetuningConfig
- RewardScoringRequest
- RewardScoringResponse
- ShieldConfig

View file

@ -72,11 +72,13 @@ class LoraFinetuningConfig:
alpha: int
@json_schema_type
@dataclass
class QLoraFinetuningConfig(LoraFinetuningConfig):
pass
@json_schema_type
@dataclass
class DoraFinetuningConfig(LoraFinetuningConfig):
pass
@ -84,14 +86,14 @@ class DoraFinetuningConfig(LoraFinetuningConfig):
@json_schema_type
@dataclass
class FinetuningJobLogStream:
class PostTrainingJobLogStream:
"""Stream of logs from a finetuning job."""
job_uuid: str
log_lines: List[str]
class FinetuningJobStatus(Enum):
class PostTrainingJobStatus(Enum):
running = "running"
completed = "completed"
failed = "failed"
@ -102,3 +104,16 @@ class FinetuningJobStatus(Enum):
class Checkpoint:
iters: int
path: URL
class RLHFAlgorithm(Enum):
dpo = "dpo"
@json_schema_type
@dataclass
class DPOAlignmentConfig:
reward_scale: float
reward_clip: float
epsilon: float
gamma: float