diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 6a8945bd1..f9af10165 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -15078,22 +15078,6 @@
             "DPOAlignmentConfig": {
                 "type": "object",
                 "properties": {
-                    "reward_scale": {
-                        "type": "number",
-                        "description": "Scaling factor for the reward signal"
-                    },
-                    "reward_clip": {
-                        "type": "number",
-                        "description": "Maximum absolute value for reward clipping"
-                    },
-                    "epsilon": {
-                        "type": "number",
-                        "description": "Small value added for numerical stability"
-                    },
-                    "gamma": {
-                        "type": "number",
-                        "description": "Discount factor for future rewards"
-                    },
                     "beta": {
                         "type": "number",
                         "description": "Temperature parameter for the DPO loss"
@@ -15106,10 +15090,6 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "reward_scale",
-                    "reward_clip",
-                    "epsilon",
-                    "gamma",
                     "beta",
                     "loss_type"
                 ],
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index f1bb40dc1..d2c41b2bf 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -11163,20 +11163,6 @@ components:
     DPOAlignmentConfig:
       type: object
       properties:
-        reward_scale:
-          type: number
-          description: Scaling factor for the reward signal
-        reward_clip:
-          type: number
-          description: >-
-            Maximum absolute value for reward clipping
-        epsilon:
-          type: number
-          description: >-
-            Small value added for numerical stability
-        gamma:
-          type: number
-          description: Discount factor for future rewards
         beta:
           type: number
           description: Temperature parameter for the DPO loss
@@ -11186,10 +11172,6 @@ components:
           description: The type of loss function to use for DPO
       additionalProperties: false
       required:
-        - reward_scale
-        - reward_clip
-        - epsilon
-        - gamma
         - beta
         - loss_type
       title: DPOAlignmentConfig
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index 9170cba51..c16221289 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -193,18 +193,10 @@ class DPOLossType(Enum):
 class DPOAlignmentConfig(BaseModel):
     """Configuration for Direct Preference Optimization (DPO) alignment.
 
-    :param reward_scale: Scaling factor for the reward signal
-    :param reward_clip: Maximum absolute value for reward clipping
-    :param epsilon: Small value added for numerical stability
-    :param gamma: Discount factor for future rewards
     :param beta: Temperature parameter for the DPO loss
     :param loss_type: The type of loss function to use for DPO
     """
 
-    reward_scale: float
-    reward_clip: float
-    epsilon: float
-    gamma: float
     beta: float
     loss_type: DPOLossType = DPOLossType.sigmoid
 
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index 839b9b1f2..002da1160 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -195,10 +195,6 @@ class TestPostTraining:
         algorithm_config = DPOAlignmentConfig(
             beta=0.1,
             loss_type=DPOLossType.sigmoid,  # Default loss type
-            reward_scale=1.0,  # Scaling factor for reward signal (neutral scaling)
-            reward_clip=5.0,  # Maximum absolute value for reward clipping (prevents extreme values)
-            epsilon=1e-8,  # Small value for numerical stability
-            gamma=1.0,
         )
         data_config = DataConfig(
             dataset_id=dataset.identifier,