From 5c33bc1353373e7cf02cc4f45c810ee108104d8a Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Thu, 31 Jul 2025 11:26:06 -0400 Subject: [PATCH] fix: post_training ci (#2984) --- tests/integration/post_training/test_post_training.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py index 05f8717d6..839b9b1f2 100644 --- a/tests/integration/post_training/test_post_training.py +++ b/tests/integration/post_training/test_post_training.py @@ -194,9 +194,12 @@ class TestPostTraining: # DPO algorithm configuration algorithm_config = DPOAlignmentConfig( beta=0.1, - loss_type=DPOLossType.sigmoid, + loss_type=DPOLossType.sigmoid, # Default loss type + reward_scale=1.0, # Scaling factor for reward signal (neutral scaling) + reward_clip=5.0, # Maximum absolute value for reward clipping (prevents extreme values) + epsilon=1e-8, # Small value for numerical stability + gamma=1.0, ) - data_config = DataConfig( dataset_id=dataset.identifier, batch_size=1,