Remove unused DPO parameters from schema and tests

2025-12-23 06:12:27 +00:00 · 2025-07-31 15:47:58 +00:00 · 2025-07-31 15:47:58 +00:00 · b7901156f5
commit b7901156f5
parent 5c33bc1353
4 changed files with 0 additions and 50 deletions
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -193,18 +193,10 @@ class DPOLossType(Enum):
 class DPOAlignmentConfig(BaseModel):
    """Configuration for Direct Preference Optimization (DPO) alignment.

-    :param reward_scale: Scaling factor for the reward signal
-    :param reward_clip: Maximum absolute value for reward clipping
-    :param epsilon: Small value added for numerical stability
-    :param gamma: Discount factor for future rewards
    :param beta: Temperature parameter for the DPO loss
    :param loss_type: The type of loss function to use for DPO
    """

-    reward_scale: float
-    reward_clip: float
-    epsilon: float
-    gamma: float
    beta: float
    loss_type: DPOLossType = DPOLossType.sigmoid