From 7b0f19fd2556be8d0abc18886ca94caad23eafb3 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Thu, 31 Jul 2025 11:45:29 -0400 Subject: [PATCH] chore: standardize missing training config error Signed-off-by: Nathan Weinberg --- llama_stack/apis/common/errors.py | 8 ++++++++ .../recipes/finetune_single_device.py | 17 +++++++++-------- .../recipes/finetune_single_device_dpo.py | 17 +++++++++-------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/llama_stack/apis/common/errors.py b/llama_stack/apis/common/errors.py index ec3d2b1ce..acf83e537 100644 --- a/llama_stack/apis/common/errors.py +++ b/llama_stack/apis/common/errors.py @@ -79,3 +79,11 @@ class ConflictError(ValueError): def __init__(self, message: str) -> None: super().__init__(message) + + +class MissingTrainingConfigError(ValueError): + """raise when Llama Stack is missing configuration for training""" + + def __init__(self, config_name: str) -> None: + message = f"'{config_name}' is required for training" + super().__init__(message) diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index d9ee3d2a8..da6ffdc4a 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -19,6 +19,7 @@ from transformers import ( ) from trl import SFTConfig, SFTTrainer +from llama_stack.apis.common.errors import MissingTrainingConfigError from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.post_training import ( @@ -224,8 +225,8 @@ class HFFinetuningSingleDevice: tuple: (train_dataset, eval_dataset, tokenizer) """ # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Load dataset logger.info(f"Loading dataset: {config.data_config.dataset_id}") @@ -300,8 +301,8 @@ class HFFinetuningSingleDevice: logger.info(f"Using custom learning rate: {lr}") # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") data_config = config.data_config # Calculate steps and get save strategy @@ -392,8 +393,8 @@ class HFFinetuningSingleDevice: train_dataset, eval_dataset, tokenizer = await self.load_dataset(model, config_obj, provider_config_obj) # Calculate steps per epoch - if not config_obj.data_config: - raise ValueError("DataConfig is required for training") + if config_obj.data_config is None: + raise MissingTrainingConfigError("DataConfig") steps_per_epoch = len(train_dataset) // config_obj.data_config.batch_size # Setup training arguments @@ -475,8 +476,8 @@ class HFFinetuningSingleDevice: ) # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Train in a separate process logger.info("Starting training in separate process") diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py index b39a24c66..4097e0810 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -16,6 +16,7 @@ from transformers import ( ) from trl import DPOConfig, DPOTrainer +from llama_stack.apis.common.errors import MissingTrainingConfigError from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.post_training import ( @@ -204,8 +205,8 @@ class HFDPOAlignmentSingleDevice: ) -> tuple[Dataset, Dataset, AutoTokenizer]: """Load and prepare the dataset for DPO training.""" # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for DPO training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Load dataset logger.info(f"Loading dataset: {config.data_config.dataset_id}") @@ -266,8 +267,8 @@ class HFDPOAlignmentSingleDevice: logger.info(f"Using custom learning rate: {lr}") # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") data_config = config.data_config # Calculate steps and get save strategy @@ -356,8 +357,8 @@ class HFDPOAlignmentSingleDevice: train_dataset, eval_dataset, tokenizer = await self.load_dataset(model, config_obj, provider_config_obj) # Calculate steps per epoch - if not config_obj.data_config: - raise ValueError("DataConfig is required for training") + if config_obj.data_config is None: + raise MissingTrainingConfigError("DataConfig") steps_per_epoch = len(train_dataset) // config_obj.data_config.batch_size # Setup training arguments @@ -441,8 +442,8 @@ class HFDPOAlignmentSingleDevice: } # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Train in a separate process logger.info("Starting DPO training in separate process")