a

2025-12-23 08:22:28 +00:00 · 2025-07-01 16:46:20 -07:00 · 2025-07-01 16:46:20 -07:00 · a9d8fdef90
commit a9d8fdef90
parent b1b93088c5
4 changed files with 319 additions and 63 deletions
--- a/api_update_plan.md
+++ b/api_update_plan.md
@ -229,8 +229,8 @@ Before finalizing documentation, verify:
 [x] 8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management
 [x] 9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management
 [x] 10. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasets/datasets.py` - Dataset management
-11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
+[x] 11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
-12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
+[x] 12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -11252,13 +11252,15 @@
                "type": "object",
                "properties": {
                    "job_uuid": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Unique identifier for the training job"
                    },
                    "checkpoints": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Checkpoint"
-                        }
+                        },
                        "description": "List of model checkpoints created during training"
                    }
                },
                "additionalProperties": false,
@ -11273,7 +11275,8 @@
                "type": "object",
                "properties": {
                    "job_uuid": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Unique identifier for the training job"
                    },
                    "status": {
                        "type": "string",
@ -11284,19 +11287,22 @@
                            "scheduled",
                            "cancelled"
                        ],
-                        "title": "JobStatus"
+                        "description": "Current status of the training job"
                    },
                    "scheduled_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "(Optional) Timestamp when the job was scheduled"
                    },
                    "started_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "(Optional) Timestamp when the job execution began"
                    },
                    "completed_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "(Optional) Timestamp when the job finished, if completed"
                    },
                    "resources_allocated": {
                        "type": "object",
@ -11321,13 +11327,15 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
                        "description": "(Optional) Information about computational resources allocated to the job"
                    },
                    "checkpoints": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Checkpoint"
-                        }
+                        },
                        "description": "List of model checkpoints created during training"
                    }
                },
                "additionalProperties": false,
@ -14644,16 +14652,20 @@
                "type": "object",
                "properties": {
                    "reward_scale": {
-                        "type": "number"
+                        "type": "number",
                        "description": "Scaling factor for the reward signal"
                    },
                    "reward_clip": {
-                        "type": "number"
+                        "type": "number",
                        "description": "Maximum absolute value for reward clipping"
                    },
                    "epsilon": {
-                        "type": "number"
+                        "type": "number",
                        "description": "Small value added for numerical stability"
                    },
                    "gamma": {
-                        "type": "number"
+                        "type": "number",
                        "description": "Discount factor for future rewards"
                    }
                },
                "additionalProperties": false,
@ -14663,33 +14675,41 @@
                    "epsilon",
                    "gamma"
                ],
-                "title": "DPOAlignmentConfig"
+                "title": "DPOAlignmentConfig",
                "description": "Configuration for Direct Preference Optimization (DPO) alignment."
            },
            "DataConfig": {
                "type": "object",
                "properties": {
                    "dataset_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Unique identifier for the training dataset"
                    },
                    "batch_size": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "Number of samples per training batch"
                    },
                    "shuffle": {
-                        "type": "boolean"
+                        "type": "boolean",
                        "description": "Whether to shuffle the dataset during training"
                    },
                    "data_format": {
-                        "$ref": "#/components/schemas/DatasetFormat"
+                        "$ref": "#/components/schemas/DatasetFormat",
                        "description": "Format of the dataset (instruct or dialog)"
                    },
                    "validation_dataset_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "(Optional) Unique identifier for the validation dataset"
                    },
                    "packed": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency"
                    },
                    "train_on_input": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to compute loss on input tokens as well as output tokens"
                    }
                },
                "additionalProperties": false,
@ -14699,7 +14719,8 @@
                    "shuffle",
                    "data_format"
                ],
-                "title": "DataConfig"
+                "title": "DataConfig",
                "description": "Configuration for training data and data loading."
            },
            "DatasetFormat": {
                "type": "string",
@ -14707,45 +14728,55 @@
                    "instruct",
                    "dialog"
                ],
-                "title": "DatasetFormat"
+                "title": "DatasetFormat",
                "description": "Format of the training dataset."
            },
            "EfficiencyConfig": {
                "type": "object",
                "properties": {
                    "enable_activation_checkpointing": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to use activation checkpointing to reduce memory usage"
                    },
                    "enable_activation_offloading": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to offload activations to CPU to save GPU memory"
                    },
                    "memory_efficient_fsdp_wrap": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to use memory-efficient FSDP wrapping"
                    },
                    "fsdp_cpu_offload": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to offload FSDP parameters to CPU"
                    }
                },
                "additionalProperties": false,
-                "title": "EfficiencyConfig"
+                "title": "EfficiencyConfig",
                "description": "Configuration for memory and compute efficiency optimizations."
            },
            "OptimizerConfig": {
                "type": "object",
                "properties": {
                    "optimizer_type": {
-                        "$ref": "#/components/schemas/OptimizerType"
+                        "$ref": "#/components/schemas/OptimizerType",
                        "description": "Type of optimizer to use (adam, adamw, or sgd)"
                    },
                    "lr": {
-                        "type": "number"
+                        "type": "number",
                        "description": "Learning rate for the optimizer"
                    },
                    "weight_decay": {
-                        "type": "number"
+                        "type": "number",
                        "description": "Weight decay coefficient for regularization"
                    },
                    "num_warmup_steps": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "Number of steps for learning rate warmup"
                    }
                },
                "additionalProperties": false,
@ -14755,7 +14786,8 @@
                    "weight_decay",
                    "num_warmup_steps"
                ],
-                "title": "OptimizerConfig"
+                "title": "OptimizerConfig",
                "description": "Configuration parameters for the optimization algorithm."
            },
            "OptimizerType": {
                "type": "string",
@ -14764,38 +14796,47 @@
                    "adamw",
                    "sgd"
                ],
-                "title": "OptimizerType"
+                "title": "OptimizerType",
                "description": "Available optimizer algorithms for training."
            },
            "TrainingConfig": {
                "type": "object",
                "properties": {
                    "n_epochs": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "Number of training epochs to run"
                    },
                    "max_steps_per_epoch": {
                        "type": "integer",
-                        "default": 1
+                        "default": 1,
                        "description": "Maximum number of steps to run per epoch"
                    },
                    "gradient_accumulation_steps": {
                        "type": "integer",
-                        "default": 1
+                        "default": 1,
                        "description": "Number of steps to accumulate gradients before updating"
                    },
                    "max_validation_steps": {
                        "type": "integer",
-                        "default": 1
+                        "default": 1,
                        "description": "(Optional) Maximum number of validation steps per epoch"
                    },
                    "data_config": {
-                        "$ref": "#/components/schemas/DataConfig"
+                        "$ref": "#/components/schemas/DataConfig",
                        "description": "(Optional) Configuration for data loading and formatting"
                    },
                    "optimizer_config": {
-                        "$ref": "#/components/schemas/OptimizerConfig"
+                        "$ref": "#/components/schemas/OptimizerConfig",
                        "description": "(Optional) Configuration for the optimization algorithm"
                    },
                    "efficiency_config": {
-                        "$ref": "#/components/schemas/EfficiencyConfig"
+                        "$ref": "#/components/schemas/EfficiencyConfig",
                        "description": "(Optional) Configuration for memory and compute optimizations"
                    },
                    "dtype": {
                        "type": "string",
-                        "default": "bf16"
+                        "default": "bf16",
                        "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)"
                    }
                },
                "additionalProperties": false,
@ -14804,7 +14845,8 @@
                    "max_steps_per_epoch",
                    "gradient_accumulation_steps"
                ],
-                "title": "TrainingConfig"
+                "title": "TrainingConfig",
                "description": "Comprehensive configuration for the training process."
            },
            "PreferenceOptimizeRequest": {
                "type": "object",
@ -16101,33 +16143,41 @@
                    "type": {
                        "type": "string",
                        "const": "LoRA",
-                        "default": "LoRA"
+                        "default": "LoRA",
                        "description": "Algorithm type identifier, always \"LoRA\""
                    },
                    "lora_attn_modules": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "List of attention module names to apply LoRA to"
                    },
                    "apply_lora_to_mlp": {
-                        "type": "boolean"
+                        "type": "boolean",
                        "description": "Whether to apply LoRA to MLP layers"
                    },
                    "apply_lora_to_output": {
-                        "type": "boolean"
+                        "type": "boolean",
                        "description": "Whether to apply LoRA to output projection layers"
                    },
                    "rank": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)"
                    },
                    "alpha": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "LoRA scaling parameter that controls adaptation strength"
                    },
                    "use_dora": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"
                    },
                    "quantize_base": {
                        "type": "boolean",
-                        "default": false
+                        "default": false,
                        "description": "(Optional) Whether to quantize the base model weights"
                    }
                },
                "additionalProperties": false,
@ -16139,7 +16189,8 @@
                    "rank",
                    "alpha"
                ],
-                "title": "LoraFinetuningConfig"
+                "title": "LoraFinetuningConfig",
                "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning."
            },
            "QATFinetuningConfig": {
                "type": "object",
@ -16147,13 +16198,16 @@
                    "type": {
                        "type": "string",
                        "const": "QAT",
-                        "default": "QAT"
+                        "default": "QAT",
                        "description": "Algorithm type identifier, always \"QAT\""
                    },
                    "quantizer_name": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Name of the quantization algorithm to use"
                    },
                    "group_size": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "Size of groups for grouped quantization"
                    }
                },
                "additionalProperties": false,
@ -16162,7 +16216,8 @@
                    "quantizer_name",
                    "group_size"
                ],
-                "title": "QATFinetuningConfig"
+                "title": "QATFinetuningConfig",
                "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning."
            },
            "SupervisedFineTuneRequest": {
                "type": "object",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -8064,10 +8064,13 @@ components:
      properties:
        job_uuid:
          type: string
          description: Unique identifier for the training job
        checkpoints:
          type: array
          items:
            $ref: '#/components/schemas/Checkpoint'
          description: >-
            List of model checkpoints created during training
      additionalProperties: false
      required:
        - job_uuid
@ -8079,6 +8082,7 @@ components:
      properties:
        job_uuid:
          type: string
          description: Unique identifier for the training job
        status:
          type: string
          enum:
@ -8087,16 +8091,22 @@ components:
            - failed
            - scheduled
            - cancelled
-          title: JobStatus
+          description: Current status of the training job
        scheduled_at:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the job was scheduled
        started_at:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the job execution began
        completed_at:
          type: string
          format: date-time
          description: >-
            (Optional) Timestamp when the job finished, if completed
        resources_allocated:
          type: object
          additionalProperties:
@ -8107,10 +8117,15 @@ components:
              - type: string
              - type: array
              - type: object
          description: >-
            (Optional) Information about computational resources allocated to the
            job
        checkpoints:
          type: array
          items:
            $ref: '#/components/schemas/Checkpoint'
          description: >-
            List of model checkpoints created during training
      additionalProperties: false
      required:
        - job_uuid
@ -10491,12 +10506,18 @@ components:
      properties:
        reward_scale:
          type: number
          description: Scaling factor for the reward signal
        reward_clip:
          type: number
          description: >-
            Maximum absolute value for reward clipping
        epsilon:
          type: number
          description: >-
            Small value added for numerical stability
        gamma:
          type: number
          description: Discount factor for future rewards
      additionalProperties: false
      required:
        - reward_scale
@ -10504,25 +10525,41 @@ components:
        - epsilon
        - gamma
      title: DPOAlignmentConfig
      description: >-
        Configuration for Direct Preference Optimization (DPO) alignment.
    DataConfig:
      type: object
      properties:
        dataset_id:
          type: string
          description: >-
            Unique identifier for the training dataset
        batch_size:
          type: integer
          description: Number of samples per training batch
        shuffle:
          type: boolean
          description: >-
            Whether to shuffle the dataset during training
        data_format:
          $ref: '#/components/schemas/DatasetFormat'
          description: >-
            Format of the dataset (instruct or dialog)
        validation_dataset_id:
          type: string
          description: >-
            (Optional) Unique identifier for the validation dataset
        packed:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to pack multiple samples into a single sequence for
            efficiency
        train_on_input:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to compute loss on input tokens as well as output tokens
      additionalProperties: false
      required:
        - dataset_id
@ -10530,40 +10567,59 @@ components:
        - shuffle
        - data_format
      title: DataConfig
      description: >-
        Configuration for training data and data loading.
    DatasetFormat:
      type: string
      enum:
        - instruct
        - dialog
      title: DatasetFormat
      description: Format of the training dataset.
    EfficiencyConfig:
      type: object
      properties:
        enable_activation_checkpointing:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to use activation checkpointing to reduce memory usage
        enable_activation_offloading:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to offload activations to CPU to save GPU memory
        memory_efficient_fsdp_wrap:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to use memory-efficient FSDP wrapping
        fsdp_cpu_offload:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to offload FSDP parameters to CPU
      additionalProperties: false
      title: EfficiencyConfig
      description: >-
        Configuration for memory and compute efficiency optimizations.
    OptimizerConfig:
      type: object
      properties:
        optimizer_type:
          $ref: '#/components/schemas/OptimizerType'
          description: >-
            Type of optimizer to use (adam, adamw, or sgd)
        lr:
          type: number
          description: Learning rate for the optimizer
        weight_decay:
          type: number
          description: >-
            Weight decay coefficient for regularization
        num_warmup_steps:
          type: integer
          description: Number of steps for learning rate warmup
      additionalProperties: false
      required:
        - optimizer_type
@ -10571,6 +10627,8 @@ components:
        - weight_decay
        - num_warmup_steps
      title: OptimizerConfig
      description: >-
        Configuration parameters for the optimization algorithm.
    OptimizerType:
      type: string
      enum:
@ -10578,35 +10636,53 @@ components:
        - adamw
        - sgd
      title: OptimizerType
      description: >-
        Available optimizer algorithms for training.
    TrainingConfig:
      type: object
      properties:
        n_epochs:
          type: integer
          description: Number of training epochs to run
        max_steps_per_epoch:
          type: integer
          default: 1
          description: Maximum number of steps to run per epoch
        gradient_accumulation_steps:
          type: integer
          default: 1
          description: >-
            Number of steps to accumulate gradients before updating
        max_validation_steps:
          type: integer
          default: 1
          description: >-
            (Optional) Maximum number of validation steps per epoch
        data_config:
          $ref: '#/components/schemas/DataConfig'
          description: >-
            (Optional) Configuration for data loading and formatting
        optimizer_config:
          $ref: '#/components/schemas/OptimizerConfig'
          description: >-
            (Optional) Configuration for the optimization algorithm
        efficiency_config:
          $ref: '#/components/schemas/EfficiencyConfig'
          description: >-
            (Optional) Configuration for memory and compute optimizations
        dtype:
          type: string
          default: bf16
          description: >-
            (Optional) Data type for model parameters (bf16, fp16, fp32)
      additionalProperties: false
      required:
        - n_epochs
        - max_steps_per_epoch
        - gradient_accumulation_steps
      title: TrainingConfig
      description: >-
        Comprehensive configuration for the training process.
    PreferenceOptimizeRequest:
      type: object
      properties:
@ -11535,24 +11611,38 @@ components:
          type: string
          const: LoRA
          default: LoRA
          description: Algorithm type identifier, always "LoRA"
        lora_attn_modules:
          type: array
          items:
            type: string
          description: >-
            List of attention module names to apply LoRA to
        apply_lora_to_mlp:
          type: boolean
          description: Whether to apply LoRA to MLP layers
        apply_lora_to_output:
          type: boolean
          description: >-
            Whether to apply LoRA to output projection layers
        rank:
          type: integer
          description: >-
            Rank of the LoRA adaptation (lower rank = fewer parameters)
        alpha:
          type: integer
          description: >-
            LoRA scaling parameter that controls adaptation strength
        use_dora:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
        quantize_base:
          type: boolean
          default: false
          description: >-
            (Optional) Whether to quantize the base model weights
      additionalProperties: false
      required:
        - type
@ -11562,6 +11652,8 @@ components:
        - rank
        - alpha
      title: LoraFinetuningConfig
      description: >-
        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
    QATFinetuningConfig:
      type: object
      properties:
@ -11569,16 +11661,22 @@ components:
          type: string
          const: QAT
          default: QAT
          description: Algorithm type identifier, always "QAT"
        quantizer_name:
          type: string
          description: >-
            Name of the quantization algorithm to use
        group_size:
          type: integer
          description: Size of groups for grouped quantization
      additionalProperties: false
      required:
        - type
        - quantizer_name
        - group_size
      title: QATFinetuningConfig
      description: >-
        Configuration for Quantization-Aware Training (QAT) fine-tuning.
    SupervisedFineTuneRequest:
      type: object
      properties:
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -18,6 +18,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
@json_schema_type
 class OptimizerType(Enum):
    """Available optimizer algorithms for training.
    :cvar adam: Adaptive Moment Estimation optimizer
    :cvar adamw: AdamW optimizer with weight decay
    :cvar sgd: Stochastic Gradient Descent optimizer
    """
    adam = "adam"
    adamw = "adamw"
    sgd = "sgd"
@ -25,12 +31,27 @@ class OptimizerType(Enum):
@json_schema_type
 class DatasetFormat(Enum):
    """Format of the training dataset.
    :cvar instruct: Instruction-following format with prompt and completion
    :cvar dialog: Multi-turn conversation format with messages
    """
    instruct = "instruct"
    dialog = "dialog"
@json_schema_type
 class DataConfig(BaseModel):
    """Configuration for training data and data loading.
    :param dataset_id: Unique identifier for the training dataset
    :param batch_size: Number of samples per training batch
    :param shuffle: Whether to shuffle the dataset during training
    :param data_format: Format of the dataset (instruct or dialog)
    :param validation_dataset_id: (Optional) Unique identifier for the validation dataset
    :param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency
    :param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens
    """
    dataset_id: str
    batch_size: int
    shuffle: bool
@ -42,6 +63,13 @@ class DataConfig(BaseModel):
@json_schema_type
 class OptimizerConfig(BaseModel):
    """Configuration parameters for the optimization algorithm.
    :param optimizer_type: Type of optimizer to use (adam, adamw, or sgd)
    :param lr: Learning rate for the optimizer
    :param weight_decay: Weight decay coefficient for regularization
    :param num_warmup_steps: Number of steps for learning rate warmup
    """
    optimizer_type: OptimizerType
    lr: float
    weight_decay: float
@ -50,6 +78,13 @@ class OptimizerConfig(BaseModel):
@json_schema_type
 class EfficiencyConfig(BaseModel):
    """Configuration for memory and compute efficiency optimizations.
    :param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage
    :param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory
    :param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping
    :param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU
    """
    enable_activation_checkpointing: bool | None = False
    enable_activation_offloading: bool | None = False
    memory_efficient_fsdp_wrap: bool | None = False
@ -58,6 +93,17 @@ class EfficiencyConfig(BaseModel):
@json_schema_type
 class TrainingConfig(BaseModel):
    """Comprehensive configuration for the training process.
    :param n_epochs: Number of training epochs to run
    :param max_steps_per_epoch: Maximum number of steps to run per epoch
    :param gradient_accumulation_steps: Number of steps to accumulate gradients before updating
    :param max_validation_steps: (Optional) Maximum number of validation steps per epoch
    :param data_config: (Optional) Configuration for data loading and formatting
    :param optimizer_config: (Optional) Configuration for the optimization algorithm
    :param efficiency_config: (Optional) Configuration for memory and compute optimizations
    :param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32)
    """
    n_epochs: int
    max_steps_per_epoch: int = 1
    gradient_accumulation_steps: int = 1
@ -70,6 +116,17 @@ class TrainingConfig(BaseModel):
@json_schema_type
 class LoraFinetuningConfig(BaseModel):
    """Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
    :param type: Algorithm type identifier, always "LoRA"
    :param lora_attn_modules: List of attention module names to apply LoRA to
    :param apply_lora_to_mlp: Whether to apply LoRA to MLP layers
    :param apply_lora_to_output: Whether to apply LoRA to output projection layers
    :param rank: Rank of the LoRA adaptation (lower rank = fewer parameters)
    :param alpha: LoRA scaling parameter that controls adaptation strength
    :param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
    :param quantize_base: (Optional) Whether to quantize the base model weights
    """
    type: Literal["LoRA"] = "LoRA"
    lora_attn_modules: list[str]
    apply_lora_to_mlp: bool
@ -82,6 +139,12 @@ class LoraFinetuningConfig(BaseModel):
@json_schema_type
 class QATFinetuningConfig(BaseModel):
    """Configuration for Quantization-Aware Training (QAT) fine-tuning.
    :param type: Algorithm type identifier, always "QAT"
    :param quantizer_name: Name of the quantization algorithm to use
    :param group_size: Size of groups for grouped quantization
    """
    type: Literal["QAT"] = "QAT"
    quantizer_name: str
    group_size: int
@ -93,7 +156,11 @@ register_schema(AlgorithmConfig, name="AlgorithmConfig")
@json_schema_type
 class PostTrainingJobLogStream(BaseModel):
-    """Stream of logs from a finetuning job."""
+    """Stream of logs from a finetuning job.
    :param job_uuid: Unique identifier for the training job
    :param log_lines: List of log message strings from the training process
    """
    job_uuid: str
    log_lines: list[str]
@ -101,11 +168,22 @@ class PostTrainingJobLogStream(BaseModel):
@json_schema_type
 class RLHFAlgorithm(Enum):
    """Available reinforcement learning from human feedback algorithms.
    :cvar dpo: Direct Preference Optimization algorithm
    """
    dpo = "dpo"
@json_schema_type
 class DPOAlignmentConfig(BaseModel):
    """Configuration for Direct Preference Optimization (DPO) alignment.
    :param reward_scale: Scaling factor for the reward signal
    :param reward_clip: Maximum absolute value for reward clipping
    :param epsilon: Small value added for numerical stability
    :param gamma: Discount factor for future rewards
    """
    reward_scale: float
    reward_clip: float
    epsilon: float
@ -114,7 +192,19 @@ class DPOAlignmentConfig(BaseModel):
@json_schema_type
 class PostTrainingRLHFRequest(BaseModel):
-    """Request to finetune a model."""
+    """Request to finetune a model using reinforcement learning from human feedback.
    :param job_uuid: Unique identifier for the training job
    :param finetuned_model: URL or path to the base model to fine-tune
    :param dataset_id: Unique identifier for the training dataset
    :param validation_dataset_id: Unique identifier for the validation dataset
    :param algorithm: RLHF algorithm to use for training
    :param algorithm_config: Configuration parameters for the RLHF algorithm
    :param optimizer_config: Configuration parameters for the optimization algorithm
    :param training_config: Configuration parameters for the training process
    :param hyperparam_search_config: Configuration for hyperparameter search
    :param logger_config: Configuration for training logging
    """
    job_uuid: str
@ -140,7 +230,16 @@ class PostTrainingJob(BaseModel):
@json_schema_type
 class PostTrainingJobStatusResponse(BaseModel):
-    """Status of a finetuning job."""
+    """Status of a finetuning job.
    :param job_uuid: Unique identifier for the training job
    :param status: Current status of the training job
    :param scheduled_at: (Optional) Timestamp when the job was scheduled
    :param started_at: (Optional) Timestamp when the job execution began
    :param completed_at: (Optional) Timestamp when the job finished, if completed
    :param resources_allocated: (Optional) Information about computational resources allocated to the job
    :param checkpoints: List of model checkpoints created during training
    """
    job_uuid: str
    status: JobStatus
@ -160,7 +259,11 @@ class ListPostTrainingJobsResponse(BaseModel):
@json_schema_type
 class PostTrainingJobArtifactsResponse(BaseModel):
-    """Artifacts of a finetuning job."""
+    """Artifacts of a finetuning job.
    :param job_uuid: Unique identifier for the training job
    :param checkpoints: List of model checkpoints created during training
    """
    job_uuid: str
    checkpoints: list[Checkpoint] = Field(default_factory=list)