diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 29ba9dede..16d5dd41a 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -10111,20 +10111,20 @@ components: DPOAlignmentConfig: type: object properties: - reward_scale: - type: number - reward_clip: - type: number - epsilon: - type: number - gamma: + beta: type: number + loss_type: + type: string + enum: + - sigmoid + - hinge + - ipo + - kto_pair + default: sigmoid additionalProperties: false required: - - reward_scale - - reward_clip - - epsilon - - gamma + - beta + - loss_type title: DPOAlignmentConfig DataConfig: type: object diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index b196c8a17..ce6448951 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -106,10 +106,8 @@ class RLHFAlgorithm(Enum): @json_schema_type class DPOAlignmentConfig(BaseModel): - reward_scale: float - reward_clip: float - epsilon: float - gamma: float + beta: float + loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid" @json_schema_type