From 4027029420b44b891af4602d1448ff22c058339c Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
Date: Thu, 20 Feb 2025 17:54:30 -0500
Subject: [PATCH] fix: don't attempt to clean gpu memory up when device is cpu

This is a follow up to:
https://github.com/meta-llama/llama-stack/pull/1140

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
---
 .../torchtune/recipes/lora_finetuning_single_device.py       | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 67de380c0..bff55e017 100644
--- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -547,10 +547,11 @@ class LoraFinetuningSingleDevice:
             checkpoints.append(checkpoint)
 
         # clean up the memory after training finishes
-        self._model.to("cpu")
+        if self._device.type != "cpu":
+            self._model.to("cpu")
+            torch.cuda.empty_cache()
         del self._model
         gc.collect()
-        torch.cuda.empty_cache()
 
         return (memory_stats, checkpoints)