mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-22 20:40:00 +00:00
fix: post_training ci
test_perference_optimize was missing args for DPOAlignmentConfig. Add them in Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
cf73146132
commit
5fc412695c
1 changed files with 5 additions and 2 deletions
|
|
@ -194,9 +194,12 @@ class TestPostTraining:
|
||||||
# DPO algorithm configuration
|
# DPO algorithm configuration
|
||||||
algorithm_config = DPOAlignmentConfig(
|
algorithm_config = DPOAlignmentConfig(
|
||||||
beta=0.1,
|
beta=0.1,
|
||||||
loss_type=DPOLossType.sigmoid,
|
loss_type=DPOLossType.sigmoid, # Default loss type
|
||||||
|
reward_scale=1.0, # Scaling factor for reward signal (neutral scaling)
|
||||||
|
reward_clip=5.0, # Maximum absolute value for reward clipping (prevents extreme values)
|
||||||
|
epsilon=1e-8, # Small value for numerical stability
|
||||||
|
gamma=1.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
data_config = DataConfig(
|
data_config = DataConfig(
|
||||||
dataset_id=dataset.identifier,
|
dataset_id=dataset.identifier,
|
||||||
batch_size=1,
|
batch_size=1,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue