Remove unused DPO parameters from schema and tests

This commit is contained in:
Nehanth 2025-07-31 15:47:58 +00:00
parent 5c33bc1353
commit b7901156f5
4 changed files with 0 additions and 50 deletions

View file

@ -11163,20 +11163,6 @@ components:
DPOAlignmentConfig:
type: object
properties:
reward_scale:
type: number
description: Scaling factor for the reward signal
reward_clip:
type: number
description: >-
Maximum absolute value for reward clipping
epsilon:
type: number
description: >-
Small value added for numerical stability
gamma:
type: number
description: Discount factor for future rewards
beta:
type: number
description: Temperature parameter for the DPO loss
@ -11186,10 +11172,6 @@ components:
description: The type of loss function to use for DPO
additionalProperties: false
required:
- reward_scale
- reward_clip
- epsilon
- gamma
- beta
- loss_type
title: DPOAlignmentConfig