From 4027029420b44b891af4602d1448ff22c058339c Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 20 Feb 2025 17:54:30 -0500 Subject: [PATCH] fix: don't attempt to clean gpu memory up when device is cpu This is a follow up to: https://github.com/meta-llama/llama-stack/pull/1140 Signed-off-by: Ihar Hrachyshka --- .../torchtune/recipes/lora_finetuning_single_device.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 67de380c0..bff55e017 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -547,10 +547,11 @@ class LoraFinetuningSingleDevice: checkpoints.append(checkpoint) # clean up the memory after training finishes - self._model.to("cpu") + if self._device.type != "cpu": + self._model.to("cpu") + torch.cuda.empty_cache() del self._model gc.collect() - torch.cuda.empty_cache() return (memory_stats, checkpoints)