added DPO

2025-12-05 10:23:44 +00:00 · 2024-07-11 00:01:58 -07:00 · 2024-07-11 00:01:58 -07:00 · 631328f556
commit 631328f556
parent 7cade3acc3
4 changed files with 796 additions and 472 deletions
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@ -12,19 +12,6 @@ from agentic_system_types import (
    SafetyViolation,
 )

-from finetuning_types import (
-    Checkpoint,
-    Dataset,
-    DoraFinetuningConfig,
-    FinetuningAlgorithm,
-    FinetuningJobLogStream,
-    FinetuningJobStatus,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    QLoraFinetuningConfig,
-    TrainingConfig,
-)
-
 from model_types import (
    BuiltinTool,
    Content,
@ -42,6 +29,21 @@ from model_types import (
    URL,
 )

+from post_training_types import (
+    Checkpoint,
+    Dataset,
+    DoraFinetuningConfig,
+    DPOAlignmentConfig,
+    FinetuningAlgorithm,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    PostTrainingJobLogStream,
+    PostTrainingJobStatus,
+    QLoraFinetuningConfig,
+    RLHFAlgorithm,
+    TrainingConfig,
+)
+
 from pyopenapi import Info, Options, Server, Specification, webmethod
 from strong_typing.schema import json_schema_type

@ -408,7 +410,7 @@ class Datasets(Protocol):

@json_schema_type
@dataclass
-class FinetuningTrainRequest:
+class PostTrainingSFTRequest:
    """Request to finetune a model."""

    job_uuid: str
@ -432,11 +434,34 @@ class FinetuningTrainRequest:

@json_schema_type
@dataclass
-class FinetuningJobStatusResponse:
+class PostTrainingRLHFRequest:
+    """Request to finetune a model."""
+
+    job_uuid: str
+
+    finetuned_model: URL
+
+    dataset: Dataset
+    validation_dataset: Dataset
+
+    algorithm: RLHFAlgorithm
+    algorithm_config: Union[DPOAlignmentConfig]
+
+    optimizer_config: OptimizerConfig
+    training_config: TrainingConfig
+
+    # TODO: define these
+    hyperparam_search_config: Dict[str, Any]
+    logger_config: Dict[str, Any]
+
+
+@json_schema_type
+@dataclass
+class PostTrainingJobStatusResponse:
    """Status of a finetuning job."""

    job_uuid: str
-    status: FinetuningJobStatus
+    status: PostTrainingJobStatus

    scheduled_at: Optional[datetime] = None
    started_at: Optional[datetime] = None
@ -449,7 +474,7 @@ class FinetuningJobStatusResponse:

@json_schema_type
@dataclass
-class FinetuningJobArtifactsResponse:
+class PostTrainingJobArtifactsResponse:
    """Artifacts of a finetuning job."""

    job_uuid: str
@ -458,27 +483,35 @@ class FinetuningJobArtifactsResponse:
    # TODO(ashwin): metrics, evals


-class Finetuning(Protocol):
-    @webmethod(route="/finetuning/text_generation/train")
-    def post_train(
+class PostTraining(Protocol):
+    @webmethod(route="/post_training/supervised_fine_tune/")
+    def post_supervised_fine_tune(
        self,
-        request: FinetuningTrainRequest,
+        request: PostTrainingSFTRequest,
+    ) -> None: ...
+
+    @webmethod(route="/post_training/preference_optimize/")
+    def post_preference_optimize(
+        self,
+        request: PostTrainingRLHFRequest,
    ) -> None: ...

    # sends SSE stream of logs
-    @webmethod(route="/finetuning/job/logs")
-    def get_training_log_stream(self, job_uuid: str) -> FinetuningJobLogStream: ...
+    @webmethod(route="/post_training/job/logs")
+    def get_training_log_stream(self, job_uuid: str) -> PostTrainingJobLogStream: ...

-    @webmethod(route="/finetuning/job/status")
-    def get_training_job_status(self, job_uuid: str) -> FinetuningJobStatusResponse: ...
+    @webmethod(route="/post_training/job/status")
+    def get_training_job_status(
+        self, job_uuid: str
+    ) -> PostTrainingJobStatusResponse: ...

-    @webmethod(route="/finetuning/job/cancel")
+    @webmethod(route="/post_training/job/cancel")
    def cancel_training_job(self, job_uuid: str) -> None: ...

-    @webmethod(route="/finetuning/job/artifacts")
+    @webmethod(route="/post_training/job/artifacts")
    def get_training_job_artifacts(
        self, job_uuid: str
-    ) -> FinetuningJobArtifactsResponse: ...
+    ) -> PostTrainingJobArtifactsResponse: ...


 class LlamaStackEndpoints(
@ -487,7 +520,7 @@ class LlamaStackEndpoints(
    RewardScoring,
    SyntheticDataGeneration,
    Datasets,
-    Finetuning,
+    PostTraining,
    MemoryBanks,
 ): ...

--- a/source/openapi.html
+++ b/source/openapi.html
@ -299,7 +299,7 @@
                "parameters": []
            }
        },
-        "/finetuning/job/artifacts": {
+        "/post_training/job/artifacts": {
            "get": {
                "responses": {
                    "200": {
@ -307,14 +307,14 @@
                        "content": {
                            "application/json": {
                                "schema": {
-                                    "$ref": "#/components/schemas/FinetuningJobArtifactsResponse"
+                                    "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
                                }
                            }
                        }
                    }
                },
                "tags": [
-                    "Finetuning"
+                    "PostTraining"
                ],
                "parameters": [
                    {
@ -328,7 +328,7 @@
                ]
            }
        },
-        "/finetuning/job/status": {
+        "/post_training/job/status": {
            "get": {
                "responses": {
                    "200": {
@ -336,14 +336,14 @@
                        "content": {
                            "application/json": {
                                "schema": {
-                                    "$ref": "#/components/schemas/FinetuningJobStatusResponse"
+                                    "$ref": "#/components/schemas/PostTrainingJobStatusResponse"
                                }
                            }
                        }
                    }
                },
                "tags": [
-                    "Finetuning"
+                    "PostTraining"
                ],
                "parameters": [
                    {
@ -357,7 +357,7 @@
                ]
            }
        },
-        "/finetuning/job/logs": {
+        "/post_training/job/logs": {
            "get": {
                "responses": {
                    "200": {
@ -365,14 +365,14 @@
                        "content": {
                            "application/json": {
                                "schema": {
-                                    "$ref": "#/components/schemas/FinetuningJobLogStream"
+                                    "$ref": "#/components/schemas/PostTrainingJobLogStream"
                                }
                            }
                        }
                    }
                },
                "tags": [
-                    "Finetuning"
+                    "PostTraining"
                ],
                "parameters": [
                    {
@ -664,6 +664,29 @@
                }
            }
        },
+        "/post_training/preference_optimize/": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "PostTraining"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/PostTrainingRLHFRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/reward_scoring/score": {
            "post": {
                "responses": {
@ -694,7 +717,7 @@
                }
            }
        },
-        "/finetuning/text_generation/train": {
+        "/post_training/supervised_fine_tune/": {
            "post": {
                "responses": {
                    "200": {
@ -702,14 +725,14 @@
                    }
                },
                "tags": [
-                    "Finetuning"
+                    "PostTraining"
                ],
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
-                                "$ref": "#/components/schemas/FinetuningTrainRequest"
+                                "$ref": "#/components/schemas/PostTrainingSFTRequest"
                            }
                        }
                    },
@ -1697,7 +1720,7 @@
                    "name"
                ]
            },
-            "FinetuningJobArtifactsResponse": {
+            "PostTrainingJobArtifactsResponse": {
                "type": "object",
                "properties": {
                    "job_uuid": {
@ -1730,7 +1753,7 @@
                ],
                "title": "Artifacts of a finetuning job."
            },
-            "FinetuningJobStatusResponse": {
+            "PostTrainingJobStatusResponse": {
                "type": "object",
                "properties": {
                    "job_uuid": {
@ -1810,7 +1833,7 @@
                ],
                "title": "Status of a finetuning job."
            },
-            "FinetuningJobLogStream": {
+            "PostTrainingJobLogStream": {
                "type": "object",
                "properties": {
                    "job_uuid": {
@ -2672,6 +2695,191 @@
                ],
                "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
            },
+            "DPOAlignmentConfig": {
+                "type": "object",
+                "properties": {
+                    "reward_scale": {
+                        "type": "number"
+                    },
+                    "reward_clip": {
+                        "type": "number"
+                    },
+                    "epsilon": {
+                        "type": "number"
+                    },
+                    "gamma": {
+                        "type": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "reward_scale",
+                    "reward_clip",
+                    "epsilon",
+                    "gamma"
+                ]
+            },
+            "OptimizerConfig": {
+                "type": "object",
+                "properties": {
+                    "optimizer_type": {
+                        "type": "string",
+                        "enum": [
+                            "adam",
+                            "adamw",
+                            "sgd"
+                        ]
+                    },
+                    "lr": {
+                        "type": "number"
+                    },
+                    "lr_min": {
+                        "type": "number"
+                    },
+                    "weight_decay": {
+                        "type": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "optimizer_type",
+                    "lr",
+                    "lr_min",
+                    "weight_decay"
+                ]
+            },
+            "PostTrainingRLHFRequest": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string"
+                    },
+                    "finetuned_model": {
+                        "$ref": "#/components/schemas/URL"
+                    },
+                    "dataset": {
+                        "$ref": "#/components/schemas/Dataset"
+                    },
+                    "validation_dataset": {
+                        "$ref": "#/components/schemas/Dataset"
+                    },
+                    "algorithm": {
+                        "type": "string",
+                        "enum": [
+                            "dpo"
+                        ]
+                    },
+                    "algorithm_config": {
+                        "$ref": "#/components/schemas/DPOAlignmentConfig"
+                    },
+                    "optimizer_config": {
+                        "$ref": "#/components/schemas/OptimizerConfig"
+                    },
+                    "training_config": {
+                        "$ref": "#/components/schemas/TrainingConfig"
+                    },
+                    "hyperparam_search_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "logger_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "finetuned_model",
+                    "dataset",
+                    "validation_dataset",
+                    "algorithm",
+                    "algorithm_config",
+                    "optimizer_config",
+                    "training_config",
+                    "hyperparam_search_config",
+                    "logger_config"
+                ],
+                "title": "Request to finetune a model."
+            },
+            "TrainingConfig": {
+                "type": "object",
+                "properties": {
+                    "n_epochs": {
+                        "type": "integer"
+                    },
+                    "batch_size": {
+                        "type": "integer"
+                    },
+                    "shuffle": {
+                        "type": "boolean"
+                    },
+                    "n_iters": {
+                        "type": "integer"
+                    },
+                    "enable_activation_checkpointing": {
+                        "type": "boolean"
+                    },
+                    "memory_efficient_fsdp_wrap": {
+                        "type": "boolean"
+                    },
+                    "fsdp_cpu_offload": {
+                        "type": "boolean"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "n_epochs",
+                    "batch_size",
+                    "shuffle",
+                    "n_iters",
+                    "enable_activation_checkpointing",
+                    "memory_efficient_fsdp_wrap",
+                    "fsdp_cpu_offload"
+                ]
+            },
            "RewardScoringRequest": {
                "type": "object",
                "properties": {
@ -2727,7 +2935,69 @@
                ],
                "title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
            },
-            "FinetuningTrainRequest": {
+            "DoraFinetuningConfig": {
+                "type": "object",
+                "properties": {
+                    "lora_attn_modules": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "apply_lora_to_mlp": {
+                        "type": "boolean"
+                    },
+                    "apply_lora_to_output": {
+                        "type": "boolean"
+                    },
+                    "rank": {
+                        "type": "integer"
+                    },
+                    "alpha": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "lora_attn_modules",
+                    "apply_lora_to_mlp",
+                    "apply_lora_to_output",
+                    "rank",
+                    "alpha"
+                ]
+            },
+            "LoraFinetuningConfig": {
+                "type": "object",
+                "properties": {
+                    "lora_attn_modules": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "apply_lora_to_mlp": {
+                        "type": "boolean"
+                    },
+                    "apply_lora_to_output": {
+                        "type": "boolean"
+                    },
+                    "rank": {
+                        "type": "integer"
+                    },
+                    "alpha": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "lora_attn_modules",
+                    "apply_lora_to_mlp",
+                    "apply_lora_to_output",
+                    "rank",
+                    "alpha"
+                ]
+            },
+            "PostTrainingSFTRequest": {
                "type": "object",
                "properties": {
                    "job_uuid": {
@ -2761,66 +3031,10 @@
                                "$ref": "#/components/schemas/LoraFinetuningConfig"
                            },
                            {
-                                "type": "object",
-                                "properties": {
-                                    "lora_attn_modules": {
-                                        "type": "array",
-                                        "items": {
-                                            "type": "string"
-                                        }
-                                    },
-                                    "apply_lora_to_mlp": {
-                                        "type": "boolean"
-                                    },
-                                    "apply_lora_to_output": {
-                                        "type": "boolean"
-                                    },
-                                    "rank": {
-                                        "type": "integer"
-                                    },
-                                    "alpha": {
-                                        "type": "integer"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "lora_attn_modules",
-                                    "apply_lora_to_mlp",
-                                    "apply_lora_to_output",
-                                    "rank",
-                                    "alpha"
-                                ]
+                                "$ref": "#/components/schemas/QLoraFinetuningConfig"
                            },
                            {
-                                "type": "object",
-                                "properties": {
-                                    "lora_attn_modules": {
-                                        "type": "array",
-                                        "items": {
-                                            "type": "string"
-                                        }
-                                    },
-                                    "apply_lora_to_mlp": {
-                                        "type": "boolean"
-                                    },
-                                    "apply_lora_to_output": {
-                                        "type": "boolean"
-                                    },
-                                    "rank": {
-                                        "type": "integer"
-                                    },
-                                    "alpha": {
-                                        "type": "integer"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "lora_attn_modules",
-                                    "apply_lora_to_mlp",
-                                    "apply_lora_to_output",
-                                    "rank",
-                                    "alpha"
-                                ]
+                                "$ref": "#/components/schemas/DoraFinetuningConfig"
                            }
                        ]
                    },
@ -2896,7 +3110,7 @@
                ],
                "title": "Request to finetune a model."
            },
-            "LoraFinetuningConfig": {
+            "QLoraFinetuningConfig": {
                "type": "object",
                "properties": {
                    "lora_attn_modules": {
@ -2926,71 +3140,6 @@
                    "rank",
                    "alpha"
                ]
-            },
-            "OptimizerConfig": {
-                "type": "object",
-                "properties": {
-                    "optimizer_type": {
-                        "type": "string",
-                        "enum": [
-                            "adam",
-                            "adamw",
-                            "sgd"
-                        ]
-                    },
-                    "lr": {
-                        "type": "number"
-                    },
-                    "lr_min": {
-                        "type": "number"
-                    },
-                    "weight_decay": {
-                        "type": "number"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "optimizer_type",
-                    "lr",
-                    "lr_min",
-                    "weight_decay"
-                ]
-            },
-            "TrainingConfig": {
-                "type": "object",
-                "properties": {
-                    "n_epochs": {
-                        "type": "integer"
-                    },
-                    "batch_size": {
-                        "type": "integer"
-                    },
-                    "shuffle": {
-                        "type": "boolean"
-                    },
-                    "n_iters": {
-                        "type": "integer"
-                    },
-                    "enable_activation_checkpointing": {
-                        "type": "boolean"
-                    },
-                    "memory_efficient_fsdp_wrap": {
-                        "type": "boolean"
-                    },
-                    "fsdp_cpu_offload": {
-                        "type": "boolean"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "n_epochs",
-                    "batch_size",
-                    "shuffle",
-                    "n_iters",
-                    "enable_activation_checkpointing",
-                    "memory_efficient_fsdp_wrap",
-                    "fsdp_cpu_offload"
-                ]
            }
        },
        "responses": {}
@ -3001,27 +3150,27 @@
        }
    ],
    "tags": [
-        {
-            "name": "RewardScoring"
-        },
-        {
-            "name": "MemoryBanks"
-        },
-        {
-            "name": "SyntheticDataGeneration"
-        },
-        {
-            "name": "Finetuning"
-        },
        {
            "name": "AgenticSystem"
        },
+        {
+            "name": "RewardScoring"
+        },
        {
            "name": "Inference"
        },
+        {
+            "name": "SyntheticDataGeneration"
+        },
        {
            "name": "Datasets"
        },
+        {
+            "name": "PostTraining"
+        },
+        {
+            "name": "MemoryBanks"
+        },
        {
            "name": "ShieldConfig",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />"
@ -3075,16 +3224,16 @@
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
        },
        {
-            "name": "FinetuningJobArtifactsResponse",
-            "description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobArtifactsResponse\" />"
+            "name": "PostTrainingJobArtifactsResponse",
+            "description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobArtifactsResponse\" />"
        },
        {
-            "name": "FinetuningJobStatusResponse",
-            "description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobStatusResponse\" />"
+            "name": "PostTrainingJobStatusResponse",
+            "description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobStatusResponse\" />"
        },
        {
-            "name": "FinetuningJobLogStream",
-            "description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />"
+            "name": "PostTrainingJobLogStream",
+            "description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobLogStream\" />"
        },
        {
            "name": "BatchChatCompletionRequest",
@ -3138,6 +3287,22 @@
            "name": "SyntheticDataGenerationResponse",
            "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
        },
+        {
+            "name": "DPOAlignmentConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DPOAlignmentConfig\" />"
+        },
+        {
+            "name": "OptimizerConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
+        },
+        {
+            "name": "PostTrainingRLHFRequest",
+            "description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingRLHFRequest\" />"
+        },
+        {
+            "name": "TrainingConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
+        },
        {
            "name": "RewardScoringRequest",
            "description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
@ -3147,20 +3312,20 @@
            "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
        },
        {
-            "name": "FinetuningTrainRequest",
-            "description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningTrainRequest\" />"
+            "name": "DoraFinetuningConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DoraFinetuningConfig\" />"
        },
        {
            "name": "LoraFinetuningConfig",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/LoraFinetuningConfig\" />"
        },
        {
-            "name": "OptimizerConfig",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
+            "name": "PostTrainingSFTRequest",
+            "description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingSFTRequest\" />"
        },
        {
-            "name": "TrainingConfig",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
+            "name": "QLoraFinetuningConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QLoraFinetuningConfig\" />"
        }
    ],
    "x-tagGroups": [
@ -3169,9 +3334,9 @@
            "tags": [
                "AgenticSystem",
                "Datasets",
-                "Finetuning",
                "Inference",
                "MemoryBanks",
+                "PostTraining",
                "RewardScoring",
                "SyntheticDataGeneration"
            ]
@ -3195,18 +3360,22 @@
                "CompletionResponse",
                "CompletionResponseStreamChunk",
                "CreateDatasetRequest",
+                "DPOAlignmentConfig",
                "Dataset",
                "Dialog",
-                "FinetuningJobArtifactsResponse",
-                "FinetuningJobLogStream",
-                "FinetuningJobStatusResponse",
-                "FinetuningTrainRequest",
+                "DoraFinetuningConfig",
                "KScoredPromptGenerations",
                "LoraFinetuningConfig",
                "MemoryBank",
                "Message",
                "MessageScore",
                "OptimizerConfig",
+                "PostTrainingJobArtifactsResponse",
+                "PostTrainingJobLogStream",
+                "PostTrainingJobStatusResponse",
+                "PostTrainingRLHFRequest",
+                "PostTrainingSFTRequest",
+                "QLoraFinetuningConfig",
                "RewardScoringRequest",
                "RewardScoringResponse",
                "ShieldConfig",
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@ -879,6 +879,23 @@ components:
      - dataset
      title: Request to create a dataset.
      type: object
+    DPOAlignmentConfig:
+      additionalProperties: false
+      properties:
+        epsilon:
+          type: number
+        gamma:
+          type: number
+        reward_clip:
+          type: number
+        reward_scale:
+          type: number
+      required:
+      - reward_scale
+      - reward_clip
+      - epsilon
+      - gamma
+      type: object
    Dataset:
      additionalProperties: false
      properties:
@ -923,195 +940,27 @@ components:
      - message
      - message_history
      type: object
-    FinetuningJobArtifactsResponse:
+    DoraFinetuningConfig:
      additionalProperties: false
      properties:
-        checkpoints:
-          items:
-            additionalProperties: false
-            properties:
-              iters:
-                type: integer
-              path:
-                $ref: '#/components/schemas/URL'
-            required:
-            - iters
-            - path
-            type: object
-          type: array
-        job_uuid:
-          type: string
-      required:
-      - job_uuid
-      - checkpoints
-      title: Artifacts of a finetuning job.
-      type: object
-    FinetuningJobLogStream:
-      additionalProperties: false
-      properties:
-        job_uuid:
-          type: string
-        log_lines:
+        alpha:
+          type: integer
+        apply_lora_to_mlp:
+          type: boolean
+        apply_lora_to_output:
+          type: boolean
+        lora_attn_modules:
          items:
            type: string
          type: array
+        rank:
+          type: integer
      required:
-      - job_uuid
-      - log_lines
-      title: Stream of logs from a finetuning job.
-      type: object
-    FinetuningJobStatusResponse:
-      additionalProperties: false
-      properties:
-        checkpoints:
-          items:
-            additionalProperties: false
-            properties:
-              iters:
-                type: integer
-              path:
-                $ref: '#/components/schemas/URL'
-            required:
-            - iters
-            - path
-            type: object
-          type: array
-        completed_at:
-          format: date-time
-          type: string
-        job_uuid:
-          type: string
-        resources_allocated:
-          additionalProperties:
-            oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          type: object
-        scheduled_at:
-          format: date-time
-          type: string
-        started_at:
-          format: date-time
-          type: string
-        status:
-          enum:
-          - running
-          - completed
-          - failed
-          - scheduled
-          type: string
-      required:
-      - job_uuid
-      - status
-      - checkpoints
-      title: Status of a finetuning job.
-      type: object
-    FinetuningTrainRequest:
-      additionalProperties: false
-      properties:
-        algorithm:
-          enum:
-          - full
-          - lora
-          - qlora
-          - dora
-          type: string
-        algorithm_config:
-          oneOf:
-          - $ref: '#/components/schemas/LoraFinetuningConfig'
-          - additionalProperties: false
-            properties:
-              alpha:
-                type: integer
-              apply_lora_to_mlp:
-                type: boolean
-              apply_lora_to_output:
-                type: boolean
-              lora_attn_modules:
-                items:
-                  type: string
-                type: array
-              rank:
-                type: integer
-            required:
-            - lora_attn_modules
-            - apply_lora_to_mlp
-            - apply_lora_to_output
-            - rank
-            - alpha
-            type: object
-          - additionalProperties: false
-            properties:
-              alpha:
-                type: integer
-              apply_lora_to_mlp:
-                type: boolean
-              apply_lora_to_output:
-                type: boolean
-              lora_attn_modules:
-                items:
-                  type: string
-                type: array
-              rank:
-                type: integer
-            required:
-            - lora_attn_modules
-            - apply_lora_to_mlp
-            - apply_lora_to_output
-            - rank
-            - alpha
-            type: object
-        dataset:
-          $ref: '#/components/schemas/Dataset'
-        hyperparam_search_config:
-          additionalProperties:
-            oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          type: object
-        job_uuid:
-          type: string
-        logger_config:
-          additionalProperties:
-            oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          type: object
-        model:
-          enum:
-          - llama3_8b
-          - llama3_70b
-          type: string
-        optimizer_config:
-          $ref: '#/components/schemas/OptimizerConfig'
-        training_config:
-          $ref: '#/components/schemas/TrainingConfig'
-        validation_dataset:
-          $ref: '#/components/schemas/Dataset'
-      required:
-      - job_uuid
-      - model
-      - dataset
-      - validation_dataset
-      - algorithm
-      - algorithm_config
-      - optimizer_config
-      - training_config
-      - hyperparam_search_config
-      - logger_config
-      title: Request to finetune a model.
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
      type: object
    KScoredPromptGenerations:
      additionalProperties: false
@ -1259,6 +1108,232 @@ components:
      - lr_min
      - weight_decay
      type: object
+    PostTrainingJobArtifactsResponse:
+      additionalProperties: false
+      properties:
+        checkpoints:
+          items:
+            additionalProperties: false
+            properties:
+              iters:
+                type: integer
+              path:
+                $ref: '#/components/schemas/URL'
+            required:
+            - iters
+            - path
+            type: object
+          type: array
+        job_uuid:
+          type: string
+      required:
+      - job_uuid
+      - checkpoints
+      title: Artifacts of a finetuning job.
+      type: object
+    PostTrainingJobLogStream:
+      additionalProperties: false
+      properties:
+        job_uuid:
+          type: string
+        log_lines:
+          items:
+            type: string
+          type: array
+      required:
+      - job_uuid
+      - log_lines
+      title: Stream of logs from a finetuning job.
+      type: object
+    PostTrainingJobStatusResponse:
+      additionalProperties: false
+      properties:
+        checkpoints:
+          items:
+            additionalProperties: false
+            properties:
+              iters:
+                type: integer
+              path:
+                $ref: '#/components/schemas/URL'
+            required:
+            - iters
+            - path
+            type: object
+          type: array
+        completed_at:
+          format: date-time
+          type: string
+        job_uuid:
+          type: string
+        resources_allocated:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        scheduled_at:
+          format: date-time
+          type: string
+        started_at:
+          format: date-time
+          type: string
+        status:
+          enum:
+          - running
+          - completed
+          - failed
+          - scheduled
+          type: string
+      required:
+      - job_uuid
+      - status
+      - checkpoints
+      title: Status of a finetuning job.
+      type: object
+    PostTrainingRLHFRequest:
+      additionalProperties: false
+      properties:
+        algorithm:
+          enum:
+          - dpo
+          type: string
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+        dataset:
+          $ref: '#/components/schemas/Dataset'
+        finetuned_model:
+          $ref: '#/components/schemas/URL'
+        hyperparam_search_config:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        job_uuid:
+          type: string
+        logger_config:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        validation_dataset:
+          $ref: '#/components/schemas/Dataset'
+      required:
+      - job_uuid
+      - finetuned_model
+      - dataset
+      - validation_dataset
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: Request to finetune a model.
+      type: object
+    PostTrainingSFTRequest:
+      additionalProperties: false
+      properties:
+        algorithm:
+          enum:
+          - full
+          - lora
+          - qlora
+          - dora
+          type: string
+        algorithm_config:
+          oneOf:
+          - $ref: '#/components/schemas/LoraFinetuningConfig'
+          - $ref: '#/components/schemas/QLoraFinetuningConfig'
+          - $ref: '#/components/schemas/DoraFinetuningConfig'
+        dataset:
+          $ref: '#/components/schemas/Dataset'
+        hyperparam_search_config:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        job_uuid:
+          type: string
+        logger_config:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        model:
+          enum:
+          - llama3_8b
+          - llama3_70b
+          type: string
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+        validation_dataset:
+          $ref: '#/components/schemas/Dataset'
+      required:
+      - job_uuid
+      - model
+      - dataset
+      - validation_dataset
+      - algorithm
+      - algorithm_config
+      - optimizer_config
+      - training_config
+      - hyperparam_search_config
+      - logger_config
+      title: Request to finetune a model.
+      type: object
+    QLoraFinetuningConfig:
+      additionalProperties: false
+      properties:
+        alpha:
+          type: integer
+        apply_lora_to_mlp:
+          type: boolean
+        apply_lora_to_output:
+          type: boolean
+        lora_attn_modules:
+          items:
+            type: string
+          type: array
+        rank:
+          type: integer
+      required:
+      - lora_attn_modules
+      - apply_lora_to_mlp
+      - apply_lora_to_output
+      - rank
+      - alpha
+      type: object
    RewardScoringRequest:
      additionalProperties: false
      properties:
@ -1581,71 +1656,6 @@ paths:
          description: OK
      tags:
      - Datasets
-  /finetuning/job/artifacts:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuningJobArtifactsResponse'
-          description: OK
-      tags:
-      - Finetuning
-  /finetuning/job/logs:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuningJobLogStream'
-          description: OK
-      tags:
-      - Finetuning
-  /finetuning/job/status:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuningJobStatusResponse'
-          description: OK
-      tags:
-      - Finetuning
-  /finetuning/text_generation/train:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/FinetuningTrainRequest'
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - Finetuning
  /memory_banks/create:
    post:
      parameters:
@ -1787,6 +1797,85 @@ paths:
          description: OK
      tags:
      - MemoryBanks
+  /post_training/job/artifacts:
+    get:
+      parameters:
+      - in: query
+        name: job_uuid
+        required: true
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
+          description: OK
+      tags:
+      - PostTraining
+  /post_training/job/logs:
+    get:
+      parameters:
+      - in: query
+        name: job_uuid
+        required: true
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobLogStream'
+          description: OK
+      tags:
+      - PostTraining
+  /post_training/job/status:
+    get:
+      parameters:
+      - in: query
+        name: job_uuid
+        required: true
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PostTrainingJobStatusResponse'
+          description: OK
+      tags:
+      - PostTraining
+  /post_training/preference_optimize/:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/PostTrainingRLHFRequest'
+        required: true
+      responses:
+        '200':
+          description: OK
+      tags:
+      - PostTraining
+  /post_training/supervised_fine_tune/:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/PostTrainingSFTRequest'
+        required: true
+      responses:
+        '200':
+          description: OK
+      tags:
+      - PostTraining
  /reward_scoring/score:
    post:
      parameters: []
@ -1828,13 +1917,13 @@ security:
 servers:
 - url: http://llama.meta.com
 tags:
- name: RewardScoring
- name: MemoryBanks
- name: SyntheticDataGeneration
- name: Finetuning
 - name: AgenticSystem
+- name: RewardScoring
 - name: Inference
+- name: SyntheticDataGeneration
 - name: Datasets
+- name: PostTraining
+- name: MemoryBanks
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
  name: ShieldConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1888,20 +1977,20 @@ tags:
 - description: 'Artifacts of a finetuning job.


-    <SchemaDefinition schemaRef="#/components/schemas/FinetuningJobArtifactsResponse"
+    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobArtifactsResponse"
    />'
-  name: FinetuningJobArtifactsResponse
+  name: PostTrainingJobArtifactsResponse
 - description: 'Status of a finetuning job.


-    <SchemaDefinition schemaRef="#/components/schemas/FinetuningJobStatusResponse"
+    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobStatusResponse"
    />'
-  name: FinetuningJobStatusResponse
+  name: PostTrainingJobStatusResponse
 - description: 'Stream of logs from a finetuning job.


-    <SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
-  name: FinetuningJobLogStream
+    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobLogStream" />'
+  name: PostTrainingJobLogStream
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
    />
  name: BatchChatCompletionRequest
@ -1961,6 +2050,19 @@ tags:
    <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
    />'
  name: SyntheticDataGenerationResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/DPOAlignmentConfig"
+    />
+  name: DPOAlignmentConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
+    />
+  name: OptimizerConfig
+- description: 'Request to finetune a model.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingRLHFRequest" />'
+  name: PostTrainingRLHFRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
+  name: TrainingConfig
 - description: 'Request to score a reward function. A list of prompts and a list of
    responses per prompt.

@ -1973,27 +2075,28 @@ tags:

    <SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
  name: RewardScoringResponse
- description: 'Request to finetune a model.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/FinetuningTrainRequest" />'
-  name: FinetuningTrainRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
+    />
+  name: DoraFinetuningConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
    />
  name: LoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
+- description: 'Request to finetune a model.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
+  name: PostTrainingSFTRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
    />
-  name: OptimizerConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
-  name: TrainingConfig
+  name: QLoraFinetuningConfig
 x-tagGroups:
 - name: Operations
  tags:
  - AgenticSystem
  - Datasets
-  - Finetuning
  - Inference
  - MemoryBanks
+  - PostTraining
  - RewardScoring
  - SyntheticDataGeneration
 - name: Types
@ -2014,18 +2117,22 @@ x-tagGroups:
  - CompletionResponse
  - CompletionResponseStreamChunk
  - CreateDatasetRequest
+  - DPOAlignmentConfig
  - Dataset
  - Dialog
-  - FinetuningJobArtifactsResponse
-  - FinetuningJobLogStream
-  - FinetuningJobStatusResponse
-  - FinetuningTrainRequest
+  - DoraFinetuningConfig
  - KScoredPromptGenerations
  - LoraFinetuningConfig
  - MemoryBank
  - Message
  - MessageScore
  - OptimizerConfig
+  - PostTrainingJobArtifactsResponse
+  - PostTrainingJobLogStream
+  - PostTrainingJobStatusResponse
+  - PostTrainingRLHFRequest
+  - PostTrainingSFTRequest
+  - QLoraFinetuningConfig
  - RewardScoringRequest
  - RewardScoringResponse
  - ShieldConfig
--- a/source/post_training_types.py
+++ b/source/post_training_types.py
@ -72,11 +72,13 @@ class LoraFinetuningConfig:
    alpha: int


+@json_schema_type
@dataclass
 class QLoraFinetuningConfig(LoraFinetuningConfig):
    pass


+@json_schema_type
@dataclass
 class DoraFinetuningConfig(LoraFinetuningConfig):
    pass
@ -84,14 +86,14 @@ class DoraFinetuningConfig(LoraFinetuningConfig):

@json_schema_type
@dataclass
-class FinetuningJobLogStream:
+class PostTrainingJobLogStream:
    """Stream of logs from a finetuning job."""

    job_uuid: str
    log_lines: List[str]


-class FinetuningJobStatus(Enum):
+class PostTrainingJobStatus(Enum):
    running = "running"
    completed = "completed"
    failed = "failed"
@ -102,3 +104,16 @@ class FinetuningJobStatus(Enum):
 class Checkpoint:
    iters: int
    path: URL
+
+
+class RLHFAlgorithm(Enum):
+    dpo = "dpo"
+
+
+@json_schema_type
+@dataclass
+class DPOAlignmentConfig:
+    reward_scale: float
+    reward_clip: float
+    epsilon: float
+    gamma: float