Merge remote-tracking branch 'upstream/main' into update-api-docs

2025-07-27 06:28:50 +00:00 · 2025-07-24 09:53:57 -07:00 · 2025-07-24 09:53:57 -07:00 · 48c5d089c6
commit 48c5d089c6
parent 3f9228ca2b 9069d878ef
445 changed files with 15118 additions and 17426 deletions
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -181,6 +181,14 @@ class RLHFAlgorithm(Enum):
    dpo = "dpo"


+@json_schema_type
+class DPOLossType(Enum):
+    sigmoid = "sigmoid"
+    hinge = "hinge"
+    ipo = "ipo"
+    kto_pair = "kto_pair"
+
+
@json_schema_type
 class DPOAlignmentConfig(BaseModel):
    """Configuration for Direct Preference Optimization (DPO) alignment.
@ -189,12 +197,16 @@ class DPOAlignmentConfig(BaseModel):
    :param reward_clip: Maximum absolute value for reward clipping
    :param epsilon: Small value added for numerical stability
    :param gamma: Discount factor for future rewards
+    :param beta: Temperature parameter for the DPO loss
+    :param loss_type: The type of loss function to use for DPO
    """

    reward_scale: float
    reward_clip: float
    epsilon: float
    gamma: float
+    beta: float
+    loss_type: DPOLossType = DPOLossType.sigmoid


@json_schema_type