From 37875a1985c99f688afca1824c8b1590413c9b3a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-43-83.ec2.internal>
Date: Thu, 17 Jul 2025 19:55:44 +0000
Subject: [PATCH] Fix DPOAlignmentConfig schema to use correct DPO parameters

- Replace incorrect PPO-like parameters (reward_scale, reward_clip, epsilon, gamma)
- Add proper DPO parameters: beta (KL coefficient) and loss_type
- Update spec to reflect the correct schema
---
 docs/_static/llama-stack-spec.yaml            | 22 +++++++++----------
 .../apis/post_training/post_training.py       |  6 ++---
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 29ba9dede..16d5dd41a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -10111,20 +10111,20 @@ components:
     DPOAlignmentConfig:
       type: object
       properties:
-        reward_scale:
-          type: number
-        reward_clip:
-          type: number
-        epsilon:
-          type: number
-        gamma:
+        beta:
           type: number
+        loss_type:
+          type: string
+          enum:
+            - sigmoid
+            - hinge
+            - ipo
+            - kto_pair
+          default: sigmoid
       additionalProperties: false
       required:
-        - reward_scale
-        - reward_clip
-        - epsilon
-        - gamma
+        - beta
+        - loss_type
       title: DPOAlignmentConfig
     DataConfig:
       type: object
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index b196c8a17..ce6448951 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -106,10 +106,8 @@ class RLHFAlgorithm(Enum):
 
 @json_schema_type
 class DPOAlignmentConfig(BaseModel):
-    reward_scale: float
-    reward_clip: float
-    epsilon: float
-    gamma: float
+    beta: float
+    loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
 
 
 @json_schema_type