Merge remote-tracking branch 'origin/main' into support_more_data_format

2025-12-26 19:18:03 +00:00 · 2025-01-06 14:19:10 -08:00 · 2025-01-06 14:19:10 -08:00 · 2a992d4f05
commit 2a992d4f05
parent bbe190a085 7a90fc5854
10 changed files with 76 additions and 55 deletions
--- a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
+++ b/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
@ -90,18 +90,24 @@ class TorchtuneCheckpointer:
        model_file_path.mkdir(parents=True, exist_ok=True)

        # copy the related files for inference
-        shutil.copy(
-            Path.joinpath(self._checkpoint_dir, "params.json"),
-            Path.joinpath(model_file_path, "params.json"),
-        )
-        shutil.copy(
-            Path.joinpath(self._checkpoint_dir, "tokenizer.model"),
-            Path.joinpath(model_file_path, "tokenizer.model"),
-        )
-        shutil.copy(
-            Path.joinpath(self._checkpoint_dir, "orig_params.json"),
-            Path.joinpath(model_file_path, "orig_params.json"),
-        )
+        source_path = Path.joinpath(self._checkpoint_dir, "params.json")
+        if source_path.exists():
+            shutil.copy(
+                source_path,
+                Path.joinpath(model_file_path, "params.json"),
+            )
+        source_path = Path.joinpath(self._checkpoint_dir, "tokenizer.model")
+        if source_path.exists():
+            shutil.copy(
+                source_path,
+                Path.joinpath(model_file_path, "tokenizer.model"),
+            )
+        source_path = Path.joinpath(self._checkpoint_dir, "orig_params.json")
+        if source_path.exists():
+            shutil.copy(
+                source_path,
+                Path.joinpath(model_file_path, "orig_params.json"),
+            )

        if not adapter_only:
            model_state_dict = state_dict[training.MODEL_KEY]
--- a/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@ -29,8 +29,9 @@ from torchtune.data._messages import (
    ShareGPTToMessages,
 )

-from torchtune.models.llama3 import llama3_tokenizer, lora_llama3_8b
+from torchtune.models.llama3 import llama3_tokenizer
 from torchtune.models.llama3._tokenizer import Llama3Tokenizer
+from torchtune.models.llama3_1 import lora_llama3_1_8b
 from torchtune.models.llama3_2 import lora_llama3_2_3b
 from torchtune.modules.transforms import Transform

@ -63,8 +64,8 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = {
        tokenizer_type=llama3_tokenizer,
        checkpoint_type="LLAMA3_2",
    ),
-    "Llama-3-8B-Instruct": ModelConfig(
-        model_definition=lora_llama3_8b,
+    "Llama3.1-8B-Instruct": ModelConfig(
+        model_definition=lora_llama3_1_8b,
        tokenizer_type=llama3_tokenizer,
        checkpoint_type="LLAMA3",
    ),