Merge remote-tracking branch 'upstream/main' into update-api-docs

This commit is contained in:
Sai Soundararaj 2025-07-24 09:53:57 -07:00
commit 48c5d089c6
445 changed files with 15118 additions and 17426 deletions

View file

@ -181,6 +181,14 @@ class RLHFAlgorithm(Enum):
dpo = "dpo"
@json_schema_type
class DPOLossType(Enum):
sigmoid = "sigmoid"
hinge = "hinge"
ipo = "ipo"
kto_pair = "kto_pair"
@json_schema_type
class DPOAlignmentConfig(BaseModel):
"""Configuration for Direct Preference Optimization (DPO) alignment.
@ -189,12 +197,16 @@ class DPOAlignmentConfig(BaseModel):
:param reward_clip: Maximum absolute value for reward clipping
:param epsilon: Small value added for numerical stability
:param gamma: Discount factor for future rewards
:param beta: Temperature parameter for the DPO loss
:param loss_type: The type of loss function to use for DPO
"""
reward_scale: float
reward_clip: float
epsilon: float
gamma: float
beta: float
loss_type: DPOLossType = DPOLossType.sigmoid
@json_schema_type