diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 7543b1f4e..0e0149ad5 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import gc import logging import os import time @@ -580,6 +581,15 @@ class LoraFinetuningSingleDevice: checkpoint.training_metrics = training_metrics checkpoints.append(checkpoint) + # clean up the memory after training finishes + self._model.to("cpu") + del self._model + gc.collect() + torch.cuda.empty_cache() + + print("Allocated:", torch.cuda.memory_allocated() / 1e6, "MB") + print("Reserved: ", torch.cuda.memory_reserved() / 1e6, "MB") + return (memory_stats, checkpoints) async def validation(self) -> Tuple[float, float]: