mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-22 20:40:00 +00:00
Fix DPOAlignmentConfig schema to use correct DPO parameters
- Replace incorrect PPO-like parameters (reward_scale, reward_clip, epsilon, gamma) - Add proper DPO parameters: beta (KL coefficient) and loss_type - Update spec to reflect the correct schema
This commit is contained in:
parent
477bcd4d09
commit
37875a1985
2 changed files with 13 additions and 15 deletions
22
docs/_static/llama-stack-spec.yaml
vendored
22
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -10111,20 +10111,20 @@ components:
|
|||
DPOAlignmentConfig:
|
||||
type: object
|
||||
properties:
|
||||
reward_scale:
|
||||
type: number
|
||||
reward_clip:
|
||||
type: number
|
||||
epsilon:
|
||||
type: number
|
||||
gamma:
|
||||
beta:
|
||||
type: number
|
||||
loss_type:
|
||||
type: string
|
||||
enum:
|
||||
- sigmoid
|
||||
- hinge
|
||||
- ipo
|
||||
- kto_pair
|
||||
default: sigmoid
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reward_scale
|
||||
- reward_clip
|
||||
- epsilon
|
||||
- gamma
|
||||
- beta
|
||||
- loss_type
|
||||
title: DPOAlignmentConfig
|
||||
DataConfig:
|
||||
type: object
|
||||
|
|
|
|||
|
|
@ -106,10 +106,8 @@ class RLHFAlgorithm(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class DPOAlignmentConfig(BaseModel):
|
||||
reward_scale: float
|
||||
reward_clip: float
|
||||
epsilon: float
|
||||
gamma: float
|
||||
beta: float
|
||||
loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue