From a9d8fdef906ff54a073d28e732c35b6950c8eba1 Mon Sep 17 00:00:00 2001 From: Sai Soundararaj Date: Tue, 1 Jul 2025 16:46:20 -0700 Subject: [PATCH] a --- api_update_plan.md | 4 +- docs/_static/llama-stack-spec.html | 167 ++++++++++++------ docs/_static/llama-stack-spec.yaml | 100 ++++++++++- .../apis/post_training/post_training.py | 111 +++++++++++- 4 files changed, 319 insertions(+), 63 deletions(-) diff --git a/api_update_plan.md b/api_update_plan.md index b59745fbc..ffda31b00 100644 --- a/api_update_plan.md +++ b/api_update_plan.md @@ -229,8 +229,8 @@ Before finalizing documentation, verify: [x] 8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management [x] 9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management [x] 10. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasets/datasets.py` - Dataset management -11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations -12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning +[x] 11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations +[x] 12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 4cebaec61..aa3fe644b 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -11252,13 +11252,15 @@ "type": "object", "properties": { "job_uuid": { - "type": "string" + "type": "string", + "description": "Unique identifier for the training job" }, "checkpoints": { "type": "array", "items": { "$ref": "#/components/schemas/Checkpoint" - } + }, + "description": "List of model checkpoints created during training" } }, "additionalProperties": false, @@ -11273,7 +11275,8 @@ "type": "object", "properties": { "job_uuid": { - "type": "string" + "type": "string", + "description": "Unique identifier for the training job" }, "status": { "type": "string", @@ -11284,19 +11287,22 @@ "scheduled", "cancelled" ], - "title": "JobStatus" + "description": "Current status of the training job" }, "scheduled_at": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "(Optional) Timestamp when the job was scheduled" }, "started_at": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "(Optional) Timestamp when the job execution began" }, "completed_at": { "type": "string", - "format": "date-time" + "format": "date-time", + "description": "(Optional) Timestamp when the job finished, if completed" }, "resources_allocated": { "type": "object", @@ -11321,13 +11327,15 @@ "type": "object" } ] - } + }, + "description": "(Optional) Information about computational resources allocated to the job" }, "checkpoints": { "type": "array", "items": { "$ref": "#/components/schemas/Checkpoint" - } + }, + "description": "List of model checkpoints created during training" } }, "additionalProperties": false, @@ -14644,16 +14652,20 @@ "type": "object", "properties": { "reward_scale": { - "type": "number" + "type": "number", + "description": "Scaling factor for the reward signal" }, "reward_clip": { - "type": "number" + "type": "number", + "description": "Maximum absolute value for reward clipping" }, "epsilon": { - "type": "number" + "type": "number", + "description": "Small value added for numerical stability" }, "gamma": { - "type": "number" + "type": "number", + "description": "Discount factor for future rewards" } }, "additionalProperties": false, @@ -14663,33 +14675,41 @@ "epsilon", "gamma" ], - "title": "DPOAlignmentConfig" + "title": "DPOAlignmentConfig", + "description": "Configuration for Direct Preference Optimization (DPO) alignment." }, "DataConfig": { "type": "object", "properties": { "dataset_id": { - "type": "string" + "type": "string", + "description": "Unique identifier for the training dataset" }, "batch_size": { - "type": "integer" + "type": "integer", + "description": "Number of samples per training batch" }, "shuffle": { - "type": "boolean" + "type": "boolean", + "description": "Whether to shuffle the dataset during training" }, "data_format": { - "$ref": "#/components/schemas/DatasetFormat" + "$ref": "#/components/schemas/DatasetFormat", + "description": "Format of the dataset (instruct or dialog)" }, "validation_dataset_id": { - "type": "string" + "type": "string", + "description": "(Optional) Unique identifier for the validation dataset" }, "packed": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency" }, "train_on_input": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to compute loss on input tokens as well as output tokens" } }, "additionalProperties": false, @@ -14699,7 +14719,8 @@ "shuffle", "data_format" ], - "title": "DataConfig" + "title": "DataConfig", + "description": "Configuration for training data and data loading." }, "DatasetFormat": { "type": "string", @@ -14707,45 +14728,55 @@ "instruct", "dialog" ], - "title": "DatasetFormat" + "title": "DatasetFormat", + "description": "Format of the training dataset." }, "EfficiencyConfig": { "type": "object", "properties": { "enable_activation_checkpointing": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to use activation checkpointing to reduce memory usage" }, "enable_activation_offloading": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to offload activations to CPU to save GPU memory" }, "memory_efficient_fsdp_wrap": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to use memory-efficient FSDP wrapping" }, "fsdp_cpu_offload": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to offload FSDP parameters to CPU" } }, "additionalProperties": false, - "title": "EfficiencyConfig" + "title": "EfficiencyConfig", + "description": "Configuration for memory and compute efficiency optimizations." }, "OptimizerConfig": { "type": "object", "properties": { "optimizer_type": { - "$ref": "#/components/schemas/OptimizerType" + "$ref": "#/components/schemas/OptimizerType", + "description": "Type of optimizer to use (adam, adamw, or sgd)" }, "lr": { - "type": "number" + "type": "number", + "description": "Learning rate for the optimizer" }, "weight_decay": { - "type": "number" + "type": "number", + "description": "Weight decay coefficient for regularization" }, "num_warmup_steps": { - "type": "integer" + "type": "integer", + "description": "Number of steps for learning rate warmup" } }, "additionalProperties": false, @@ -14755,7 +14786,8 @@ "weight_decay", "num_warmup_steps" ], - "title": "OptimizerConfig" + "title": "OptimizerConfig", + "description": "Configuration parameters for the optimization algorithm." }, "OptimizerType": { "type": "string", @@ -14764,38 +14796,47 @@ "adamw", "sgd" ], - "title": "OptimizerType" + "title": "OptimizerType", + "description": "Available optimizer algorithms for training." }, "TrainingConfig": { "type": "object", "properties": { "n_epochs": { - "type": "integer" + "type": "integer", + "description": "Number of training epochs to run" }, "max_steps_per_epoch": { "type": "integer", - "default": 1 + "default": 1, + "description": "Maximum number of steps to run per epoch" }, "gradient_accumulation_steps": { "type": "integer", - "default": 1 + "default": 1, + "description": "Number of steps to accumulate gradients before updating" }, "max_validation_steps": { "type": "integer", - "default": 1 + "default": 1, + "description": "(Optional) Maximum number of validation steps per epoch" }, "data_config": { - "$ref": "#/components/schemas/DataConfig" + "$ref": "#/components/schemas/DataConfig", + "description": "(Optional) Configuration for data loading and formatting" }, "optimizer_config": { - "$ref": "#/components/schemas/OptimizerConfig" + "$ref": "#/components/schemas/OptimizerConfig", + "description": "(Optional) Configuration for the optimization algorithm" }, "efficiency_config": { - "$ref": "#/components/schemas/EfficiencyConfig" + "$ref": "#/components/schemas/EfficiencyConfig", + "description": "(Optional) Configuration for memory and compute optimizations" }, "dtype": { "type": "string", - "default": "bf16" + "default": "bf16", + "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)" } }, "additionalProperties": false, @@ -14804,7 +14845,8 @@ "max_steps_per_epoch", "gradient_accumulation_steps" ], - "title": "TrainingConfig" + "title": "TrainingConfig", + "description": "Comprehensive configuration for the training process." }, "PreferenceOptimizeRequest": { "type": "object", @@ -16101,33 +16143,41 @@ "type": { "type": "string", "const": "LoRA", - "default": "LoRA" + "default": "LoRA", + "description": "Algorithm type identifier, always \"LoRA\"" }, "lora_attn_modules": { "type": "array", "items": { "type": "string" - } + }, + "description": "List of attention module names to apply LoRA to" }, "apply_lora_to_mlp": { - "type": "boolean" + "type": "boolean", + "description": "Whether to apply LoRA to MLP layers" }, "apply_lora_to_output": { - "type": "boolean" + "type": "boolean", + "description": "Whether to apply LoRA to output projection layers" }, "rank": { - "type": "integer" + "type": "integer", + "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)" }, "alpha": { - "type": "integer" + "type": "integer", + "description": "LoRA scaling parameter that controls adaptation strength" }, "use_dora": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)" }, "quantize_base": { "type": "boolean", - "default": false + "default": false, + "description": "(Optional) Whether to quantize the base model weights" } }, "additionalProperties": false, @@ -16139,7 +16189,8 @@ "rank", "alpha" ], - "title": "LoraFinetuningConfig" + "title": "LoraFinetuningConfig", + "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning." }, "QATFinetuningConfig": { "type": "object", @@ -16147,13 +16198,16 @@ "type": { "type": "string", "const": "QAT", - "default": "QAT" + "default": "QAT", + "description": "Algorithm type identifier, always \"QAT\"" }, "quantizer_name": { - "type": "string" + "type": "string", + "description": "Name of the quantization algorithm to use" }, "group_size": { - "type": "integer" + "type": "integer", + "description": "Size of groups for grouped quantization" } }, "additionalProperties": false, @@ -16162,7 +16216,8 @@ "quantizer_name", "group_size" ], - "title": "QATFinetuningConfig" + "title": "QATFinetuningConfig", + "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning." }, "SupervisedFineTuneRequest": { "type": "object", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index d154c6d76..ce7a7293f 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -8064,10 +8064,13 @@ components: properties: job_uuid: type: string + description: Unique identifier for the training job checkpoints: type: array items: $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training additionalProperties: false required: - job_uuid @@ -8079,6 +8082,7 @@ components: properties: job_uuid: type: string + description: Unique identifier for the training job status: type: string enum: @@ -8087,16 +8091,22 @@ components: - failed - scheduled - cancelled - title: JobStatus + description: Current status of the training job scheduled_at: type: string format: date-time + description: >- + (Optional) Timestamp when the job was scheduled started_at: type: string format: date-time + description: >- + (Optional) Timestamp when the job execution began completed_at: type: string format: date-time + description: >- + (Optional) Timestamp when the job finished, if completed resources_allocated: type: object additionalProperties: @@ -8107,10 +8117,15 @@ components: - type: string - type: array - type: object + description: >- + (Optional) Information about computational resources allocated to the + job checkpoints: type: array items: $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training additionalProperties: false required: - job_uuid @@ -10491,12 +10506,18 @@ components: properties: reward_scale: type: number + description: Scaling factor for the reward signal reward_clip: type: number + description: >- + Maximum absolute value for reward clipping epsilon: type: number + description: >- + Small value added for numerical stability gamma: type: number + description: Discount factor for future rewards additionalProperties: false required: - reward_scale @@ -10504,25 +10525,41 @@ components: - epsilon - gamma title: DPOAlignmentConfig + description: >- + Configuration for Direct Preference Optimization (DPO) alignment. DataConfig: type: object properties: dataset_id: type: string + description: >- + Unique identifier for the training dataset batch_size: type: integer + description: Number of samples per training batch shuffle: type: boolean + description: >- + Whether to shuffle the dataset during training data_format: $ref: '#/components/schemas/DatasetFormat' + description: >- + Format of the dataset (instruct or dialog) validation_dataset_id: type: string + description: >- + (Optional) Unique identifier for the validation dataset packed: type: boolean default: false + description: >- + (Optional) Whether to pack multiple samples into a single sequence for + efficiency train_on_input: type: boolean default: false + description: >- + (Optional) Whether to compute loss on input tokens as well as output tokens additionalProperties: false required: - dataset_id @@ -10530,40 +10567,59 @@ components: - shuffle - data_format title: DataConfig + description: >- + Configuration for training data and data loading. DatasetFormat: type: string enum: - instruct - dialog title: DatasetFormat + description: Format of the training dataset. EfficiencyConfig: type: object properties: enable_activation_checkpointing: type: boolean default: false + description: >- + (Optional) Whether to use activation checkpointing to reduce memory usage enable_activation_offloading: type: boolean default: false + description: >- + (Optional) Whether to offload activations to CPU to save GPU memory memory_efficient_fsdp_wrap: type: boolean default: false + description: >- + (Optional) Whether to use memory-efficient FSDP wrapping fsdp_cpu_offload: type: boolean default: false + description: >- + (Optional) Whether to offload FSDP parameters to CPU additionalProperties: false title: EfficiencyConfig + description: >- + Configuration for memory and compute efficiency optimizations. OptimizerConfig: type: object properties: optimizer_type: $ref: '#/components/schemas/OptimizerType' + description: >- + Type of optimizer to use (adam, adamw, or sgd) lr: type: number + description: Learning rate for the optimizer weight_decay: type: number + description: >- + Weight decay coefficient for regularization num_warmup_steps: type: integer + description: Number of steps for learning rate warmup additionalProperties: false required: - optimizer_type @@ -10571,6 +10627,8 @@ components: - weight_decay - num_warmup_steps title: OptimizerConfig + description: >- + Configuration parameters for the optimization algorithm. OptimizerType: type: string enum: @@ -10578,35 +10636,53 @@ components: - adamw - sgd title: OptimizerType + description: >- + Available optimizer algorithms for training. TrainingConfig: type: object properties: n_epochs: type: integer + description: Number of training epochs to run max_steps_per_epoch: type: integer default: 1 + description: Maximum number of steps to run per epoch gradient_accumulation_steps: type: integer default: 1 + description: >- + Number of steps to accumulate gradients before updating max_validation_steps: type: integer default: 1 + description: >- + (Optional) Maximum number of validation steps per epoch data_config: $ref: '#/components/schemas/DataConfig' + description: >- + (Optional) Configuration for data loading and formatting optimizer_config: $ref: '#/components/schemas/OptimizerConfig' + description: >- + (Optional) Configuration for the optimization algorithm efficiency_config: $ref: '#/components/schemas/EfficiencyConfig' + description: >- + (Optional) Configuration for memory and compute optimizations dtype: type: string default: bf16 + description: >- + (Optional) Data type for model parameters (bf16, fp16, fp32) additionalProperties: false required: - n_epochs - max_steps_per_epoch - gradient_accumulation_steps title: TrainingConfig + description: >- + Comprehensive configuration for the training process. PreferenceOptimizeRequest: type: object properties: @@ -11535,24 +11611,38 @@ components: type: string const: LoRA default: LoRA + description: Algorithm type identifier, always "LoRA" lora_attn_modules: type: array items: type: string + description: >- + List of attention module names to apply LoRA to apply_lora_to_mlp: type: boolean + description: Whether to apply LoRA to MLP layers apply_lora_to_output: type: boolean + description: >- + Whether to apply LoRA to output projection layers rank: type: integer + description: >- + Rank of the LoRA adaptation (lower rank = fewer parameters) alpha: type: integer + description: >- + LoRA scaling parameter that controls adaptation strength use_dora: type: boolean default: false + description: >- + (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) quantize_base: type: boolean default: false + description: >- + (Optional) Whether to quantize the base model weights additionalProperties: false required: - type @@ -11562,6 +11652,8 @@ components: - rank - alpha title: LoraFinetuningConfig + description: >- + Configuration for Low-Rank Adaptation (LoRA) fine-tuning. QATFinetuningConfig: type: object properties: @@ -11569,16 +11661,22 @@ components: type: string const: QAT default: QAT + description: Algorithm type identifier, always "QAT" quantizer_name: type: string + description: >- + Name of the quantization algorithm to use group_size: type: integer + description: Size of groups for grouped quantization additionalProperties: false required: - type - quantizer_name - group_size title: QATFinetuningConfig + description: >- + Configuration for Quantization-Aware Training (QAT) fine-tuning. SupervisedFineTuneRequest: type: object properties: diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index b196c8a17..2a41428b8 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -18,6 +18,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho @json_schema_type class OptimizerType(Enum): + """Available optimizer algorithms for training. + + :cvar adam: Adaptive Moment Estimation optimizer + :cvar adamw: AdamW optimizer with weight decay + :cvar sgd: Stochastic Gradient Descent optimizer + """ adam = "adam" adamw = "adamw" sgd = "sgd" @@ -25,12 +31,27 @@ class OptimizerType(Enum): @json_schema_type class DatasetFormat(Enum): + """Format of the training dataset. + + :cvar instruct: Instruction-following format with prompt and completion + :cvar dialog: Multi-turn conversation format with messages + """ instruct = "instruct" dialog = "dialog" @json_schema_type class DataConfig(BaseModel): + """Configuration for training data and data loading. + + :param dataset_id: Unique identifier for the training dataset + :param batch_size: Number of samples per training batch + :param shuffle: Whether to shuffle the dataset during training + :param data_format: Format of the dataset (instruct or dialog) + :param validation_dataset_id: (Optional) Unique identifier for the validation dataset + :param packed: (Optional) Whether to pack multiple samples into a single sequence for efficiency + :param train_on_input: (Optional) Whether to compute loss on input tokens as well as output tokens + """ dataset_id: str batch_size: int shuffle: bool @@ -42,6 +63,13 @@ class DataConfig(BaseModel): @json_schema_type class OptimizerConfig(BaseModel): + """Configuration parameters for the optimization algorithm. + + :param optimizer_type: Type of optimizer to use (adam, adamw, or sgd) + :param lr: Learning rate for the optimizer + :param weight_decay: Weight decay coefficient for regularization + :param num_warmup_steps: Number of steps for learning rate warmup + """ optimizer_type: OptimizerType lr: float weight_decay: float @@ -50,6 +78,13 @@ class OptimizerConfig(BaseModel): @json_schema_type class EfficiencyConfig(BaseModel): + """Configuration for memory and compute efficiency optimizations. + + :param enable_activation_checkpointing: (Optional) Whether to use activation checkpointing to reduce memory usage + :param enable_activation_offloading: (Optional) Whether to offload activations to CPU to save GPU memory + :param memory_efficient_fsdp_wrap: (Optional) Whether to use memory-efficient FSDP wrapping + :param fsdp_cpu_offload: (Optional) Whether to offload FSDP parameters to CPU + """ enable_activation_checkpointing: bool | None = False enable_activation_offloading: bool | None = False memory_efficient_fsdp_wrap: bool | None = False @@ -58,6 +93,17 @@ class EfficiencyConfig(BaseModel): @json_schema_type class TrainingConfig(BaseModel): + """Comprehensive configuration for the training process. + + :param n_epochs: Number of training epochs to run + :param max_steps_per_epoch: Maximum number of steps to run per epoch + :param gradient_accumulation_steps: Number of steps to accumulate gradients before updating + :param max_validation_steps: (Optional) Maximum number of validation steps per epoch + :param data_config: (Optional) Configuration for data loading and formatting + :param optimizer_config: (Optional) Configuration for the optimization algorithm + :param efficiency_config: (Optional) Configuration for memory and compute optimizations + :param dtype: (Optional) Data type for model parameters (bf16, fp16, fp32) + """ n_epochs: int max_steps_per_epoch: int = 1 gradient_accumulation_steps: int = 1 @@ -70,6 +116,17 @@ class TrainingConfig(BaseModel): @json_schema_type class LoraFinetuningConfig(BaseModel): + """Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + + :param type: Algorithm type identifier, always "LoRA" + :param lora_attn_modules: List of attention module names to apply LoRA to + :param apply_lora_to_mlp: Whether to apply LoRA to MLP layers + :param apply_lora_to_output: Whether to apply LoRA to output projection layers + :param rank: Rank of the LoRA adaptation (lower rank = fewer parameters) + :param alpha: LoRA scaling parameter that controls adaptation strength + :param use_dora: (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) + :param quantize_base: (Optional) Whether to quantize the base model weights + """ type: Literal["LoRA"] = "LoRA" lora_attn_modules: list[str] apply_lora_to_mlp: bool @@ -82,6 +139,12 @@ class LoraFinetuningConfig(BaseModel): @json_schema_type class QATFinetuningConfig(BaseModel): + """Configuration for Quantization-Aware Training (QAT) fine-tuning. + + :param type: Algorithm type identifier, always "QAT" + :param quantizer_name: Name of the quantization algorithm to use + :param group_size: Size of groups for grouped quantization + """ type: Literal["QAT"] = "QAT" quantizer_name: str group_size: int @@ -93,7 +156,11 @@ register_schema(AlgorithmConfig, name="AlgorithmConfig") @json_schema_type class PostTrainingJobLogStream(BaseModel): - """Stream of logs from a finetuning job.""" + """Stream of logs from a finetuning job. + + :param job_uuid: Unique identifier for the training job + :param log_lines: List of log message strings from the training process + """ job_uuid: str log_lines: list[str] @@ -101,11 +168,22 @@ class PostTrainingJobLogStream(BaseModel): @json_schema_type class RLHFAlgorithm(Enum): + """Available reinforcement learning from human feedback algorithms. + + :cvar dpo: Direct Preference Optimization algorithm + """ dpo = "dpo" @json_schema_type class DPOAlignmentConfig(BaseModel): + """Configuration for Direct Preference Optimization (DPO) alignment. + + :param reward_scale: Scaling factor for the reward signal + :param reward_clip: Maximum absolute value for reward clipping + :param epsilon: Small value added for numerical stability + :param gamma: Discount factor for future rewards + """ reward_scale: float reward_clip: float epsilon: float @@ -114,7 +192,19 @@ class DPOAlignmentConfig(BaseModel): @json_schema_type class PostTrainingRLHFRequest(BaseModel): - """Request to finetune a model.""" + """Request to finetune a model using reinforcement learning from human feedback. + + :param job_uuid: Unique identifier for the training job + :param finetuned_model: URL or path to the base model to fine-tune + :param dataset_id: Unique identifier for the training dataset + :param validation_dataset_id: Unique identifier for the validation dataset + :param algorithm: RLHF algorithm to use for training + :param algorithm_config: Configuration parameters for the RLHF algorithm + :param optimizer_config: Configuration parameters for the optimization algorithm + :param training_config: Configuration parameters for the training process + :param hyperparam_search_config: Configuration for hyperparameter search + :param logger_config: Configuration for training logging + """ job_uuid: str @@ -140,7 +230,16 @@ class PostTrainingJob(BaseModel): @json_schema_type class PostTrainingJobStatusResponse(BaseModel): - """Status of a finetuning job.""" + """Status of a finetuning job. + + :param job_uuid: Unique identifier for the training job + :param status: Current status of the training job + :param scheduled_at: (Optional) Timestamp when the job was scheduled + :param started_at: (Optional) Timestamp when the job execution began + :param completed_at: (Optional) Timestamp when the job finished, if completed + :param resources_allocated: (Optional) Information about computational resources allocated to the job + :param checkpoints: List of model checkpoints created during training + """ job_uuid: str status: JobStatus @@ -160,7 +259,11 @@ class ListPostTrainingJobsResponse(BaseModel): @json_schema_type class PostTrainingJobArtifactsResponse(BaseModel): - """Artifacts of a finetuning job.""" + """Artifacts of a finetuning job. + + :param job_uuid: Unique identifier for the training job + :param checkpoints: List of model checkpoints created during training + """ job_uuid: str checkpoints: list[Checkpoint] = Field(default_factory=list)