From a9d8fdef906ff54a073d28e732c35b6950c8eba1 Mon Sep 17 00:00:00 2001
From: Sai Soundararaj <s.saiprashanth@gmail.com>
Date: Tue, 1 Jul 2025 16:46:20 -0700
Subject: [PATCH] a

---
 api_update_plan.md                            |   4 +-
 docs/_static/llama-stack-spec.html            | 167 ++++++++++++------
 docs/_static/llama-stack-spec.yaml            | 100 ++++++++++-
 .../apis/post_training/post_training.py       | 111 +++++++++++-
 4 files changed, 319 insertions(+), 63 deletions(-)

diff --git a/api_update_plan.md b/api_update_plan.md
index b59745fbc..ffda31b00 100644
--- a/api_update_plan.md
+++ b/api_update_plan.md
@@ -229,8 +229,8 @@ Before finalizing documentation, verify:
 [x] 8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management
 [x] 9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management
 [x] 10. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasets/datasets.py` - Dataset management
-11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
-12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
+[x] 11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
+[x] 12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 4cebaec61..aa3fe644b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -11252,13 +11252,15 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Unique identifier for the training job"
                     },
                     "checkpoints": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/Checkpoint"
-                        }
+                        },
+                        "description": "List of model checkpoints created during training"
                     }
                 },
                 "additionalProperties": false,
@@ -11273,7 +11275,8 @@
                 "type": "object",
                 "properties": {
                     "job_uuid": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Unique identifier for the training job"
                     },
                     "status": {
                         "type": "string",
@@ -11284,19 +11287,22 @@
                             "scheduled",
                             "cancelled"
                         ],
-                        "title": "JobStatus"
+                        "description": "Current status of the training job"
                     },
                     "scheduled_at": {
                         "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job was scheduled"
                     },
                     "started_at": {
                         "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job execution began"
                     },
                     "completed_at": {
                         "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job finished, if completed"
                     },
                     "resources_allocated": {
                         "type": "object",
@@ -11321,13 +11327,15 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "(Optional) Information about computational resources allocated to the job"
                     },
                     "checkpoints": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/Checkpoint"
-                        }
+                        },
+                        "description": "List of model checkpoints created during training"
                     }
                 },
                 "additionalProperties": false,
@@ -14644,16 +14652,20 @@
                 "type": "object",
                 "properties": {
                     "reward_scale": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Scaling factor for the reward signal"
                     },
                     "reward_clip": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Maximum absolute value for reward clipping"
                     },
                     "epsilon": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Small value added for numerical stability"
                     },
                     "gamma": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Discount factor for future rewards"
                     }
                 },
                 "additionalProperties": false,
@@ -14663,33 +14675,41 @@
                     "epsilon",
                     "gamma"
                 ],
-                "title": "DPOAlignmentConfig"
+                "title": "DPOAlignmentConfig",
+                "description": "Configuration for Direct Preference Optimization (DPO) alignment."
             },
             "DataConfig": {
                 "type": "object",
                 "properties": {
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Unique identifier for the training dataset"
                     },
                     "batch_size": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Number of samples per training batch"
                     },
                     "shuffle": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether to shuffle the dataset during training"
                     },
                     "data_format": {
-                        "$ref": "#/components/schemas/DatasetFormat"
+                        "$ref": "#/components/schemas/DatasetFormat",
+                        "description": "Format of the dataset (instruct or dialog)"
                     },
                     "validation_dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the validation dataset"
                     },
                     "packed": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency"
                     },
                     "train_on_input": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to compute loss on input tokens as well as output tokens"
                     }
                 },
                 "additionalProperties": false,
@@ -14699,7 +14719,8 @@
                     "shuffle",
                     "data_format"
                 ],
-                "title": "DataConfig"
+                "title": "DataConfig",
+                "description": "Configuration for training data and data loading."
             },
             "DatasetFormat": {
                 "type": "string",
@@ -14707,45 +14728,55 @@
                     "instruct",
                     "dialog"
                 ],
-                "title": "DatasetFormat"
+                "title": "DatasetFormat",
+                "description": "Format of the training dataset."
             },
             "EfficiencyConfig": {
                 "type": "object",
                 "properties": {
                     "enable_activation_checkpointing": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to use activation checkpointing to reduce memory usage"
                     },
                     "enable_activation_offloading": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to offload activations to CPU to save GPU memory"
                     },
                     "memory_efficient_fsdp_wrap": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to use memory-efficient FSDP wrapping"
                     },
                     "fsdp_cpu_offload": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to offload FSDP parameters to CPU"
                     }
                 },
                 "additionalProperties": false,
-                "title": "EfficiencyConfig"
+                "title": "EfficiencyConfig",
+                "description": "Configuration for memory and compute efficiency optimizations."
             },
             "OptimizerConfig": {
                 "type": "object",
                 "properties": {
                     "optimizer_type": {
-                        "$ref": "#/components/schemas/OptimizerType"
+                        "$ref": "#/components/schemas/OptimizerType",
+                        "description": "Type of optimizer to use (adam, adamw, or sgd)"
                     },
                     "lr": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Learning rate for the optimizer"
                     },
                     "weight_decay": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Weight decay coefficient for regularization"
                     },
                     "num_warmup_steps": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Number of steps for learning rate warmup"
                     }
                 },
                 "additionalProperties": false,
@@ -14755,7 +14786,8 @@
                     "weight_decay",
                     "num_warmup_steps"
                 ],
-                "title": "OptimizerConfig"
+                "title": "OptimizerConfig",
+                "description": "Configuration parameters for the optimization algorithm."
             },
             "OptimizerType": {
                 "type": "string",
@@ -14764,38 +14796,47 @@
                     "adamw",
                     "sgd"
                 ],
-                "title": "OptimizerType"
+                "title": "OptimizerType",
+                "description": "Available optimizer algorithms for training."
             },
             "TrainingConfig": {
                 "type": "object",
                 "properties": {
                     "n_epochs": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Number of training epochs to run"
                     },
                     "max_steps_per_epoch": {
                         "type": "integer",
-                        "default": 1
+                        "default": 1,
+                        "description": "Maximum number of steps to run per epoch"
                     },
                     "gradient_accumulation_steps": {
                         "type": "integer",
-                        "default": 1
+                        "default": 1,
+                        "description": "Number of steps to accumulate gradients before updating"
                     },
                     "max_validation_steps": {
                         "type": "integer",
-                        "default": 1
+                        "default": 1,
+                        "description": "(Optional) Maximum number of validation steps per epoch"
                     },
                     "data_config": {
-                        "$ref": "#/components/schemas/DataConfig"
+                        "$ref": "#/components/schemas/DataConfig",
+                        "description": "(Optional) Configuration for data loading and formatting"
                     },
                     "optimizer_config": {
-                        "$ref": "#/components/schemas/OptimizerConfig"
+                        "$ref": "#/components/schemas/OptimizerConfig",
+                        "description": "(Optional) Configuration for the optimization algorithm"
                     },
                     "efficiency_config": {
-                        "$ref": "#/components/schemas/EfficiencyConfig"
+                        "$ref": "#/components/schemas/EfficiencyConfig",
+                        "description": "(Optional) Configuration for memory and compute optimizations"
                     },
                     "dtype": {
                         "type": "string",
-                        "default": "bf16"
+                        "default": "bf16",
+                        "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)"
                     }
                 },
                 "additionalProperties": false,
@@ -14804,7 +14845,8 @@
                     "max_steps_per_epoch",
                     "gradient_accumulation_steps"
                 ],
-                "title": "TrainingConfig"
+                "title": "TrainingConfig",
+                "description": "Comprehensive configuration for the training process."
             },
             "PreferenceOptimizeRequest": {
                 "type": "object",
@@ -16101,33 +16143,41 @@
                     "type": {
                         "type": "string",
                         "const": "LoRA",
-                        "default": "LoRA"
+                        "default": "LoRA",
+                        "description": "Algorithm type identifier, always \"LoRA\""
                     },
                     "lora_attn_modules": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "List of attention module names to apply LoRA to"
                     },
                     "apply_lora_to_mlp": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether to apply LoRA to MLP layers"
                     },
                     "apply_lora_to_output": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether to apply LoRA to output projection layers"
                     },
                     "rank": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)"
                     },
                     "alpha": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "LoRA scaling parameter that controls adaptation strength"
                     },
                     "use_dora": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"
                     },
                     "quantize_base": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "(Optional) Whether to quantize the base model weights"
                     }
                 },
                 "additionalProperties": false,
@@ -16139,7 +16189,8 @@
                     "rank",
                     "alpha"
                 ],
-                "title": "LoraFinetuningConfig"
+                "title": "LoraFinetuningConfig",
+                "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning."
             },
             "QATFinetuningConfig": {
                 "type": "object",
@@ -16147,13 +16198,16 @@
                     "type": {
                         "type": "string",
                         "const": "QAT",
-                        "default": "QAT"
+                        "default": "QAT",
+                        "description": "Algorithm type identifier, always \"QAT\""
                     },
                     "quantizer_name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Name of the quantization algorithm to use"
                     },
                     "group_size": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Size of groups for grouped quantization"
                     }
                 },
                 "additionalProperties": false,
@@ -16162,7 +16216,8 @@
                     "quantizer_name",
                     "group_size"
                 ],
-                "title": "QATFinetuningConfig"
+                "title": "QATFinetuningConfig",
+                "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning."
             },
             "SupervisedFineTuneRequest": {
                 "type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index d154c6d76..ce7a7293f 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -8064,10 +8064,13 @@ components:
       properties:
         job_uuid:
           type: string
+          description: Unique identifier for the training job
         checkpoints:
           type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
+          description: >-
+            List of model checkpoints created during training
       additionalProperties: false
       required:
         - job_uuid
@@ -8079,6 +8082,7 @@ components:
       properties:
         job_uuid:
           type: string
+          description: Unique identifier for the training job
         status:
           type: string
           enum:
@@ -8087,16 +8091,22 @@ components:
             - failed
             - scheduled
             - cancelled
-          title: JobStatus
+          description: Current status of the training job
         scheduled_at:
           type: string
           format: date-time
+          description: >-
+            (Optional) Timestamp when the job was scheduled
         started_at:
           type: string
           format: date-time
+          description: >-
+            (Optional) Timestamp when the job execution began
         completed_at:
           type: string
           format: date-time
+          description: >-
+            (Optional) Timestamp when the job finished, if completed
         resources_allocated:
           type: object
           additionalProperties:
@@ -8107,10 +8117,15 @@ components:
               - type: string
               - type: array
               - type: object
+          description: >-
+            (Optional) Information about computational resources allocated to the
+            job
         checkpoints:
           type: array
           items:
             $ref: '#/components/schemas/Checkpoint'
+          description: >-
+            List of model checkpoints created during training
       additionalProperties: false
       required:
         - job_uuid
@@ -10491,12 +10506,18 @@ components:
       properties:
         reward_scale:
           type: number
+          description: Scaling factor for the reward signal
         reward_clip:
           type: number
+          description: >-
+            Maximum absolute value for reward clipping
         epsilon:
           type: number
+          description: >-
+            Small value added for numerical stability
         gamma:
           type: number
+          description: Discount factor for future rewards
       additionalProperties: false
       required:
         - reward_scale
@@ -10504,25 +10525,41 @@ components:
         - epsilon
         - gamma
       title: DPOAlignmentConfig
+      description: >-
+        Configuration for Direct Preference Optimization (DPO) alignment.
     DataConfig:
       type: object
       properties:
         dataset_id:
           type: string
+          description: >-
+            Unique identifier for the training dataset
         batch_size:
           type: integer
+          description: Number of samples per training batch
         shuffle:
           type: boolean
+          description: >-
+            Whether to shuffle the dataset during training
         data_format:
           $ref: '#/components/schemas/DatasetFormat'
+          description: >-
+            Format of the dataset (instruct or dialog)
         validation_dataset_id:
           type: string
+          description: >-
+            (Optional) Unique identifier for the validation dataset
         packed:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to pack multiple samples into a single sequence for
+            efficiency
         train_on_input:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to compute loss on input tokens as well as output tokens
       additionalProperties: false
       required:
         - dataset_id
@@ -10530,40 +10567,59 @@ components:
         - shuffle
         - data_format
       title: DataConfig
+      description: >-
+        Configuration for training data and data loading.
     DatasetFormat:
       type: string
       enum:
         - instruct
         - dialog
       title: DatasetFormat
+      description: Format of the training dataset.
     EfficiencyConfig:
       type: object
       properties:
         enable_activation_checkpointing:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to use activation checkpointing to reduce memory usage
         enable_activation_offloading:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to offload activations to CPU to save GPU memory
         memory_efficient_fsdp_wrap:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to use memory-efficient FSDP wrapping
         fsdp_cpu_offload:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to offload FSDP parameters to CPU
       additionalProperties: false
       title: EfficiencyConfig
+      description: >-
+        Configuration for memory and compute efficiency optimizations.
     OptimizerConfig:
       type: object
       properties:
         optimizer_type:
           $ref: '#/components/schemas/OptimizerType'
+          description: >-
+            Type of optimizer to use (adam, adamw, or sgd)
         lr:
           type: number
+          description: Learning rate for the optimizer
         weight_decay:
           type: number
+          description: >-
+            Weight decay coefficient for regularization
         num_warmup_steps:
           type: integer
+          description: Number of steps for learning rate warmup
       additionalProperties: false
       required:
         - optimizer_type
@@ -10571,6 +10627,8 @@ components:
         - weight_decay
         - num_warmup_steps
       title: OptimizerConfig
+      description: >-
+        Configuration parameters for the optimization algorithm.
     OptimizerType:
       type: string
       enum:
@@ -10578,35 +10636,53 @@ components:
         - adamw
         - sgd
       title: OptimizerType
+      description: >-
+        Available optimizer algorithms for training.
     TrainingConfig:
       type: object
       properties:
         n_epochs:
           type: integer
+          description: Number of training epochs to run
         max_steps_per_epoch:
           type: integer
           default: 1
+          description: Maximum number of steps to run per epoch
         gradient_accumulation_steps:
           type: integer
           default: 1
+          description: >-
+            Number of steps to accumulate gradients before updating
         max_validation_steps:
           type: integer
           default: 1
+          description: >-
+            (Optional) Maximum number of validation steps per epoch
         data_config:
           $ref: '#/components/schemas/DataConfig'
+          description: >-
+            (Optional) Configuration for data loading and formatting
         optimizer_config:
           $ref: '#/components/schemas/OptimizerConfig'
+          description: >-
+            (Optional) Configuration for the optimization algorithm
         efficiency_config:
           $ref: '#/components/schemas/EfficiencyConfig'
+          description: >-
+            (Optional) Configuration for memory and compute optimizations
         dtype:
           type: string
           default: bf16
+          description: >-
+            (Optional) Data type for model parameters (bf16, fp16, fp32)
       additionalProperties: false
       required:
         - n_epochs
         - max_steps_per_epoch
         - gradient_accumulation_steps
       title: TrainingConfig
+      description: >-
+        Comprehensive configuration for the training process.
     PreferenceOptimizeRequest:
       type: object
       properties:
@@ -11535,24 +11611,38 @@ components:
           type: string
           const: LoRA
           default: LoRA
+          description: Algorithm type identifier, always "LoRA"
         lora_attn_modules:
           type: array
           items:
             type: string
+          description: >-
+            List of attention module names to apply LoRA to
         apply_lora_to_mlp:
           type: boolean
+          description: Whether to apply LoRA to MLP layers
         apply_lora_to_output:
           type: boolean
+          description: >-
+            Whether to apply LoRA to output projection layers
         rank:
           type: integer
+          description: >-
+            Rank of the LoRA adaptation (lower rank = fewer parameters)
         alpha:
           type: integer
+          description: >-
+            LoRA scaling parameter that controls adaptation strength
         use_dora:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
         quantize_base:
           type: boolean
           default: false
+          description: >-
+            (Optional) Whether to quantize the base model weights
       additionalProperties: false
       required:
         - type
@@ -11562,6 +11652,8 @@ components:
         - rank
         - alpha
       title: LoraFinetuningConfig
+      description: >-
+        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
     QATFinetuningConfig:
       type: object
       properties:
@@ -11569,16 +11661,22 @@ components:
           type: string
           const: QAT
           default: QAT
+          description: Algorithm type identifier, always "QAT"
         quantizer_name:
           type: string
+          description: >-
+            Name of the quantization algorithm to use
         group_size:
           type: integer
+          description: Size of groups for grouped quantization
       additionalProperties: false
       required:
         - type
         - quantizer_name
         - group_size
       title: QATFinetuningConfig
+      description: >-
+        Configuration for Quantization-Aware Training (QAT) fine-tuning.
     SupervisedFineTuneRequest:
       type: object
       properties:
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index b196c8a17..2a41428b8 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -18,6 +18,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 
 @json_schema_type
 class OptimizerType(Enum):
+    """Available optimizer algorithms for training.
+    
+    :cvar adam: Adaptive Moment Estimation optimizer
+    :cvar adamw: AdamW optimizer with weight decay
+    :cvar sgd: Stochastic Gradient Descent optimizer
+    """
     adam = "adam"
     adamw = "adamw"
     sgd = "sgd"
@@ -25,12 +31,27 @@ class OptimizerType(Enum):
 
 @json_schema_type
 class DatasetFormat(Enum):
+    """Format of the training dataset.
+    
+    :cvar instruct: Instruction-following format with prompt and completion
+    :cvar dialog: Multi-turn conversation format with messages
+    """
     instruct = "instruct"
     dialog = "dialog"
 
 
 @json_schema_type
 class DataConfig(BaseModel):
+    """Configuration for training data and data loading.
+
+    :param dataset_id: Unique identifier for the training dataset
+    :param batch_size: Number of samples per training batch
+    :param shuffle: Whether to shuffle the dataset during training
+    :param data_format: Format of the dataset (instruct or dialog)
+    :param validation_dataset_id: (Optional) Unique identifier for the validation dataset
+    :param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency
+    :param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens
+    """
     dataset_id: str
     batch_size: int
     shuffle: bool
@@ -42,6 +63,13 @@ class DataConfig(BaseModel):
 
 @json_schema_type
 class OptimizerConfig(BaseModel):
+    """Configuration parameters for the optimization algorithm.
+
+    :param optimizer_type: Type of optimizer to use (adam, adamw, or sgd)
+    :param lr: Learning rate for the optimizer
+    :param weight_decay: Weight decay coefficient for regularization
+    :param num_warmup_steps: Number of steps for learning rate warmup
+    """
     optimizer_type: OptimizerType
     lr: float
     weight_decay: float
@@ -50,6 +78,13 @@ class OptimizerConfig(BaseModel):
 
 @json_schema_type
 class EfficiencyConfig(BaseModel):
+    """Configuration for memory and compute efficiency optimizations.
+
+    :param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage
+    :param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory
+    :param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping
+    :param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU
+    """
     enable_activation_checkpointing: bool | None = False
     enable_activation_offloading: bool | None = False
     memory_efficient_fsdp_wrap: bool | None = False
@@ -58,6 +93,17 @@ class EfficiencyConfig(BaseModel):
 
 @json_schema_type
 class TrainingConfig(BaseModel):
+    """Comprehensive configuration for the training process.
+
+    :param n_epochs: Number of training epochs to run
+    :param max_steps_per_epoch: Maximum number of steps to run per epoch
+    :param gradient_accumulation_steps: Number of steps to accumulate gradients before updating
+    :param max_validation_steps: (Optional) Maximum number of validation steps per epoch
+    :param data_config: (Optional) Configuration for data loading and formatting
+    :param optimizer_config: (Optional) Configuration for the optimization algorithm
+    :param efficiency_config: (Optional) Configuration for memory and compute optimizations
+    :param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32)
+    """
     n_epochs: int
     max_steps_per_epoch: int = 1
     gradient_accumulation_steps: int = 1
@@ -70,6 +116,17 @@ class TrainingConfig(BaseModel):
 
 @json_schema_type
 class LoraFinetuningConfig(BaseModel):
+    """Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+
+    :param type: Algorithm type identifier, always "LoRA"
+    :param lora_attn_modules: List of attention module names to apply LoRA to
+    :param apply_lora_to_mlp: Whether to apply LoRA to MLP layers
+    :param apply_lora_to_output: Whether to apply LoRA to output projection layers
+    :param rank: Rank of the LoRA adaptation (lower rank = fewer parameters)
+    :param alpha: LoRA scaling parameter that controls adaptation strength
+    :param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
+    :param quantize_base: (Optional) Whether to quantize the base model weights
+    """
     type: Literal["LoRA"] = "LoRA"
     lora_attn_modules: list[str]
     apply_lora_to_mlp: bool
@@ -82,6 +139,12 @@ class LoraFinetuningConfig(BaseModel):
 
 @json_schema_type
 class QATFinetuningConfig(BaseModel):
+    """Configuration for Quantization-Aware Training (QAT) fine-tuning.
+
+    :param type: Algorithm type identifier, always "QAT"
+    :param quantizer_name: Name of the quantization algorithm to use
+    :param group_size: Size of groups for grouped quantization
+    """
     type: Literal["QAT"] = "QAT"
     quantizer_name: str
     group_size: int
@@ -93,7 +156,11 @@ register_schema(AlgorithmConfig, name="AlgorithmConfig")
 
 @json_schema_type
 class PostTrainingJobLogStream(BaseModel):
-    """Stream of logs from a finetuning job."""
+    """Stream of logs from a finetuning job.
+
+    :param job_uuid: Unique identifier for the training job
+    :param log_lines: List of log message strings from the training process
+    """
 
     job_uuid: str
     log_lines: list[str]
@@ -101,11 +168,22 @@ class PostTrainingJobLogStream(BaseModel):
 
 @json_schema_type
 class RLHFAlgorithm(Enum):
+    """Available reinforcement learning from human feedback algorithms.
+    
+    :cvar dpo: Direct Preference Optimization algorithm
+    """
     dpo = "dpo"
 
 
 @json_schema_type
 class DPOAlignmentConfig(BaseModel):
+    """Configuration for Direct Preference Optimization (DPO) alignment.
+
+    :param reward_scale: Scaling factor for the reward signal
+    :param reward_clip: Maximum absolute value for reward clipping
+    :param epsilon: Small value added for numerical stability
+    :param gamma: Discount factor for future rewards
+    """
     reward_scale: float
     reward_clip: float
     epsilon: float
@@ -114,7 +192,19 @@ class DPOAlignmentConfig(BaseModel):
 
 @json_schema_type
 class PostTrainingRLHFRequest(BaseModel):
-    """Request to finetune a model."""
+    """Request to finetune a model using reinforcement learning from human feedback.
+
+    :param job_uuid: Unique identifier for the training job
+    :param finetuned_model: URL or path to the base model to fine-tune
+    :param dataset_id: Unique identifier for the training dataset
+    :param validation_dataset_id: Unique identifier for the validation dataset
+    :param algorithm: RLHF algorithm to use for training
+    :param algorithm_config: Configuration parameters for the RLHF algorithm
+    :param optimizer_config: Configuration parameters for the optimization algorithm
+    :param training_config: Configuration parameters for the training process
+    :param hyperparam_search_config: Configuration for hyperparameter search
+    :param logger_config: Configuration for training logging
+    """
 
     job_uuid: str
 
@@ -140,7 +230,16 @@ class PostTrainingJob(BaseModel):
 
 @json_schema_type
 class PostTrainingJobStatusResponse(BaseModel):
-    """Status of a finetuning job."""
+    """Status of a finetuning job.
+
+    :param job_uuid: Unique identifier for the training job
+    :param status: Current status of the training job
+    :param scheduled_at: (Optional) Timestamp when the job was scheduled
+    :param started_at: (Optional) Timestamp when the job execution began
+    :param completed_at: (Optional) Timestamp when the job finished, if completed
+    :param resources_allocated: (Optional) Information about computational resources allocated to the job
+    :param checkpoints: List of model checkpoints created during training
+    """
 
     job_uuid: str
     status: JobStatus
@@ -160,7 +259,11 @@ class ListPostTrainingJobsResponse(BaseModel):
 
 @json_schema_type
 class PostTrainingJobArtifactsResponse(BaseModel):
-    """Artifacts of a finetuning job."""
+    """Artifacts of a finetuning job.
+
+    :param job_uuid: Unique identifier for the training job
+    :param checkpoints: List of model checkpoints created during training
+    """
 
     job_uuid: str
     checkpoints: list[Checkpoint] = Field(default_factory=list)