From c36628fefde3f4f3e1f354e5dea1f46a07888c14 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Thu, 31 Jul 2025 11:45:29 -0400 Subject: [PATCH] chore: standardize missing training config error Signed-off-by: Nathan Weinberg --- llama_stack/apis/common/errors.py | 8 ++++++++ .../recipes/finetune_single_device.py | 17 +++++++++-------- .../recipes/finetune_single_device_dpo.py | 17 +++++++++-------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/llama_stack/apis/common/errors.py b/llama_stack/apis/common/errors.py index 6e0fa0b3c..326c48046 100644 --- a/llama_stack/apis/common/errors.py +++ b/llama_stack/apis/common/errors.py @@ -72,3 +72,11 @@ class ModelTypeError(TypeError): f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'" ) super().__init__(message) + + +class MissingTrainingConfigError(ValueError): + """raise when Llama Stack is missing configuration for training""" + + def __init__(self, config_name: str) -> None: + message = f"'{config_name}' is required for training" + super().__init__(message) diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index 2574b995b..2853e0234 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -20,6 +20,7 @@ from transformers import ( ) from trl import SFTConfig, SFTTrainer +from llama_stack.apis.common.errors import MissingTrainingConfigError from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.post_training import ( @@ -224,8 +225,8 @@ class HFFinetuningSingleDevice: tuple: (train_dataset, eval_dataset, tokenizer) """ # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Load dataset logger.info(f"Loading dataset: {config.data_config.dataset_id}") @@ -300,8 +301,8 @@ class HFFinetuningSingleDevice: logger.info(f"Using custom learning rate: {lr}") # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") data_config = config.data_config # Calculate steps and get save strategy @@ -392,8 +393,8 @@ class HFFinetuningSingleDevice: train_dataset, eval_dataset, tokenizer = await self.load_dataset(model, config_obj, provider_config_obj) # Calculate steps per epoch - if not config_obj.data_config: - raise ValueError("DataConfig is required for training") + if config_obj.data_config is None: + raise MissingTrainingConfigError("DataConfig") steps_per_epoch = len(train_dataset) // config_obj.data_config.batch_size # Setup training arguments @@ -475,8 +476,8 @@ class HFFinetuningSingleDevice: ) # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Train in a separate process logger.info("Starting training in separate process") diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py index a7c19faac..3c0b77eed 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -17,6 +17,7 @@ from transformers import ( ) from trl import DPOConfig, DPOTrainer +from llama_stack.apis.common.errors import MissingTrainingConfigError from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.post_training import ( @@ -204,8 +205,8 @@ class HFDPOAlignmentSingleDevice: ) -> tuple[Dataset, Dataset, AutoTokenizer]: """Load and prepare the dataset for DPO training.""" # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for DPO training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Load dataset logger.info(f"Loading dataset: {config.data_config.dataset_id}") @@ -266,8 +267,8 @@ class HFDPOAlignmentSingleDevice: logger.info(f"Using custom learning rate: {lr}") # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") data_config = config.data_config # Calculate steps and get save strategy @@ -356,8 +357,8 @@ class HFDPOAlignmentSingleDevice: train_dataset, eval_dataset, tokenizer = await self.load_dataset(model, config_obj, provider_config_obj) # Calculate steps per epoch - if not config_obj.data_config: - raise ValueError("DataConfig is required for training") + if config_obj.data_config is None: + raise MissingTrainingConfigError("DataConfig") steps_per_epoch = len(train_dataset) // config_obj.data_config.batch_size # Setup training arguments @@ -441,8 +442,8 @@ class HFDPOAlignmentSingleDevice: } # Validate data config - if not config.data_config: - raise ValueError("DataConfig is required for training") + if config.data_config is None: + raise MissingTrainingConfigError("DataConfig") # Train in a separate process logger.info("Starting DPO training in separate process")