diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 29ba9dede..16d5dd41a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -10111,20 +10111,20 @@ components:
     DPOAlignmentConfig:
       type: object
       properties:
-        reward_scale:
-          type: number
-        reward_clip:
-          type: number
-        epsilon:
-          type: number
-        gamma:
+        beta:
           type: number
+        loss_type:
+          type: string
+          enum:
+            - sigmoid
+            - hinge
+            - ipo
+            - kto_pair
+          default: sigmoid
       additionalProperties: false
       required:
-        - reward_scale
-        - reward_clip
-        - epsilon
-        - gamma
+        - beta
+        - loss_type
       title: DPOAlignmentConfig
     DataConfig:
       type: object
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index b196c8a17..ce6448951 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -106,10 +106,8 @@ class RLHFAlgorithm(Enum):
 
 @json_schema_type
 class DPOAlignmentConfig(BaseModel):
-    reward_scale: float
-    reward_clip: float
-    epsilon: float
-    gamma: float
+    beta: float
+    loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
 
 
 @json_schema_type