Remove unused DPO parameters from schema and tests

This commit is contained in:
Nehanth 2025-07-31 15:47:58 +00:00
parent 5c33bc1353
commit b7901156f5
4 changed files with 0 additions and 50 deletions

View file

@ -15078,22 +15078,6 @@
"DPOAlignmentConfig": {
"type": "object",
"properties": {
"reward_scale": {
"type": "number",
"description": "Scaling factor for the reward signal"
},
"reward_clip": {
"type": "number",
"description": "Maximum absolute value for reward clipping"
},
"epsilon": {
"type": "number",
"description": "Small value added for numerical stability"
},
"gamma": {
"type": "number",
"description": "Discount factor for future rewards"
},
"beta": {
"type": "number",
"description": "Temperature parameter for the DPO loss"
@ -15106,10 +15090,6 @@
},
"additionalProperties": false,
"required": [
"reward_scale",
"reward_clip",
"epsilon",
"gamma",
"beta",
"loss_type"
],