mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-27 06:28:50 +00:00
feat: enable DPO training with HuggingFace inline provider
This commit is contained in:
parent
874b1cb00f
commit
1c7be17113
7 changed files with 813 additions and 101 deletions
|
@ -67,6 +67,11 @@ class HuggingFacePostTrainingConfig(BaseModel):
|
|||
# Can improve data transfer speed to GPU but uses more memory
|
||||
dataloader_pin_memory: bool = True
|
||||
|
||||
# DPO-specific parameters
|
||||
dpo_beta: float = 0.1
|
||||
use_reference_model: bool = True
|
||||
dpo_loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {"checkpoint_format": "huggingface", "distributed_backend": None, "device": "cpu"}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue