mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 00:52:26 +00:00
Fix DPOAlignmentConfig schema to use correct DPO parameters
- Replace incorrect PPO-like parameters (reward_scale, reward_clip, epsilon, gamma) - Add proper DPO parameters: beta (KL coefficient) and loss_type - Update spec to reflect the correct schema
This commit is contained in:
parent
477bcd4d09
commit
37875a1985
2 changed files with 13 additions and 15 deletions
|
|
@ -106,10 +106,8 @@ class RLHFAlgorithm(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class DPOAlignmentConfig(BaseModel):
|
||||
reward_scale: float
|
||||
reward_clip: float
|
||||
epsilon: float
|
||||
gamma: float
|
||||
beta: float
|
||||
loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue