mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-27 06:28:50 +00:00
Merge remote-tracking branch 'upstream/main' into update-api-docs
This commit is contained in:
commit
48c5d089c6
445 changed files with 15118 additions and 17426 deletions
|
@ -181,6 +181,14 @@ class RLHFAlgorithm(Enum):
|
|||
dpo = "dpo"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DPOLossType(Enum):
|
||||
sigmoid = "sigmoid"
|
||||
hinge = "hinge"
|
||||
ipo = "ipo"
|
||||
kto_pair = "kto_pair"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DPOAlignmentConfig(BaseModel):
|
||||
"""Configuration for Direct Preference Optimization (DPO) alignment.
|
||||
|
@ -189,12 +197,16 @@ class DPOAlignmentConfig(BaseModel):
|
|||
:param reward_clip: Maximum absolute value for reward clipping
|
||||
:param epsilon: Small value added for numerical stability
|
||||
:param gamma: Discount factor for future rewards
|
||||
:param beta: Temperature parameter for the DPO loss
|
||||
:param loss_type: The type of loss function to use for DPO
|
||||
"""
|
||||
|
||||
reward_scale: float
|
||||
reward_clip: float
|
||||
epsilon: float
|
||||
gamma: float
|
||||
beta: float
|
||||
loss_type: DPOLossType = DPOLossType.sigmoid
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue