Fix DPO loss_type to use Enum instead of Literal for schema generation

- Replace Literal["sigmoid", "hinge", "ipo", "kto_pair"] with DPOLossType enum
- Schema generator only supports Literal types with single values
This commit is contained in:
Ubuntu 2025-07-17 20:47:24 +00:00
parent 37875a1985
commit b4c13cc003

View file

@ -104,10 +104,18 @@ class RLHFAlgorithm(Enum):
dpo = "dpo" dpo = "dpo"
@json_schema_type
class DPOLossType(Enum):
sigmoid = "sigmoid"
hinge = "hinge"
ipo = "ipo"
kto_pair = "kto_pair"
@json_schema_type @json_schema_type
class DPOAlignmentConfig(BaseModel): class DPOAlignmentConfig(BaseModel):
beta: float beta: float
loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid" loss_type: DPOLossType = DPOLossType.sigmoid
@json_schema_type @json_schema_type