feat: enable DPO training with HuggingFace inline provider

2025-07-27 06:28:50 +00:00 · 2025-07-23 15:39:36 +00:00 · 2025-07-23 15:39:36 +00:00 · 1c7be17113
commit 1c7be17113
parent 874b1cb00f
7 changed files with 813 additions and 101 deletions
--- a/llama_stack/providers/inline/post_training/huggingface/config.py
+++ b/llama_stack/providers/inline/post_training/huggingface/config.py
@ -67,6 +67,11 @@ class HuggingFacePostTrainingConfig(BaseModel):
    # Can improve data transfer speed to GPU but uses more memory
    dataloader_pin_memory: bool = True

+    # DPO-specific parameters
+    dpo_beta: float = 0.1
+    use_reference_model: bool = True
+    dpo_loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
+
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {"checkpoint_format": "huggingface", "distributed_backend": None, "device": "cpu"}