fix: don't attempt to clean gpu memory up when device is cpu

This is a follow up to: https://github.com/meta-llama/llama-stack/pull/1140 Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
2025-08-12 13:00:39 +00:00 · 2025-02-20 17:54:30 -05:00 · 2025-02-20 17:54:30 -05:00 · 4027029420
commit 4027029420
parent 736560ceba
1 changed files with 3 additions and 2 deletions
--- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@ -547,10 +547,11 @@ class LoraFinetuningSingleDevice:
            checkpoints.append(checkpoint)

        # clean up the memory after training finishes
-        self._model.to("cpu")
+        if self._device.type != "cpu":
+            self._model.to("cpu")
+            torch.cuda.empty_cache()
        del self._model
        gc.collect()
-        torch.cuda.empty_cache()

        return (memory_stats, checkpoints)