mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 08:02:27 +00:00
Remove unused DPO parameters from schema and tests
This commit is contained in:
parent
5c33bc1353
commit
b7901156f5
4 changed files with 0 additions and 50 deletions
|
|
@ -193,18 +193,10 @@ class DPOLossType(Enum):
|
|||
class DPOAlignmentConfig(BaseModel):
|
||||
"""Configuration for Direct Preference Optimization (DPO) alignment.
|
||||
|
||||
:param reward_scale: Scaling factor for the reward signal
|
||||
:param reward_clip: Maximum absolute value for reward clipping
|
||||
:param epsilon: Small value added for numerical stability
|
||||
:param gamma: Discount factor for future rewards
|
||||
:param beta: Temperature parameter for the DPO loss
|
||||
:param loss_type: The type of loss function to use for DPO
|
||||
"""
|
||||
|
||||
reward_scale: float
|
||||
reward_clip: float
|
||||
epsilon: float
|
||||
gamma: float
|
||||
beta: float
|
||||
loss_type: DPOLossType = DPOLossType.sigmoid
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue