mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
added DPO
This commit is contained in:
parent
7cade3acc3
commit
631328f556
4 changed files with 796 additions and 472 deletions
|
@ -12,19 +12,6 @@ from agentic_system_types import (
|
||||||
SafetyViolation,
|
SafetyViolation,
|
||||||
)
|
)
|
||||||
|
|
||||||
from finetuning_types import (
|
|
||||||
Checkpoint,
|
|
||||||
Dataset,
|
|
||||||
DoraFinetuningConfig,
|
|
||||||
FinetuningAlgorithm,
|
|
||||||
FinetuningJobLogStream,
|
|
||||||
FinetuningJobStatus,
|
|
||||||
LoraFinetuningConfig,
|
|
||||||
OptimizerConfig,
|
|
||||||
QLoraFinetuningConfig,
|
|
||||||
TrainingConfig,
|
|
||||||
)
|
|
||||||
|
|
||||||
from model_types import (
|
from model_types import (
|
||||||
BuiltinTool,
|
BuiltinTool,
|
||||||
Content,
|
Content,
|
||||||
|
@ -42,6 +29,21 @@ from model_types import (
|
||||||
URL,
|
URL,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from post_training_types import (
|
||||||
|
Checkpoint,
|
||||||
|
Dataset,
|
||||||
|
DoraFinetuningConfig,
|
||||||
|
DPOAlignmentConfig,
|
||||||
|
FinetuningAlgorithm,
|
||||||
|
LoraFinetuningConfig,
|
||||||
|
OptimizerConfig,
|
||||||
|
PostTrainingJobLogStream,
|
||||||
|
PostTrainingJobStatus,
|
||||||
|
QLoraFinetuningConfig,
|
||||||
|
RLHFAlgorithm,
|
||||||
|
TrainingConfig,
|
||||||
|
)
|
||||||
|
|
||||||
from pyopenapi import Info, Options, Server, Specification, webmethod
|
from pyopenapi import Info, Options, Server, Specification, webmethod
|
||||||
from strong_typing.schema import json_schema_type
|
from strong_typing.schema import json_schema_type
|
||||||
|
|
||||||
|
@ -408,7 +410,7 @@ class Datasets(Protocol):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
@dataclass
|
@dataclass
|
||||||
class FinetuningTrainRequest:
|
class PostTrainingSFTRequest:
|
||||||
"""Request to finetune a model."""
|
"""Request to finetune a model."""
|
||||||
|
|
||||||
job_uuid: str
|
job_uuid: str
|
||||||
|
@ -432,11 +434,34 @@ class FinetuningTrainRequest:
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
@dataclass
|
@dataclass
|
||||||
class FinetuningJobStatusResponse:
|
class PostTrainingRLHFRequest:
|
||||||
|
"""Request to finetune a model."""
|
||||||
|
|
||||||
|
job_uuid: str
|
||||||
|
|
||||||
|
finetuned_model: URL
|
||||||
|
|
||||||
|
dataset: Dataset
|
||||||
|
validation_dataset: Dataset
|
||||||
|
|
||||||
|
algorithm: RLHFAlgorithm
|
||||||
|
algorithm_config: Union[DPOAlignmentConfig]
|
||||||
|
|
||||||
|
optimizer_config: OptimizerConfig
|
||||||
|
training_config: TrainingConfig
|
||||||
|
|
||||||
|
# TODO: define these
|
||||||
|
hyperparam_search_config: Dict[str, Any]
|
||||||
|
logger_config: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
@dataclass
|
||||||
|
class PostTrainingJobStatusResponse:
|
||||||
"""Status of a finetuning job."""
|
"""Status of a finetuning job."""
|
||||||
|
|
||||||
job_uuid: str
|
job_uuid: str
|
||||||
status: FinetuningJobStatus
|
status: PostTrainingJobStatus
|
||||||
|
|
||||||
scheduled_at: Optional[datetime] = None
|
scheduled_at: Optional[datetime] = None
|
||||||
started_at: Optional[datetime] = None
|
started_at: Optional[datetime] = None
|
||||||
|
@ -449,7 +474,7 @@ class FinetuningJobStatusResponse:
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
@dataclass
|
@dataclass
|
||||||
class FinetuningJobArtifactsResponse:
|
class PostTrainingJobArtifactsResponse:
|
||||||
"""Artifacts of a finetuning job."""
|
"""Artifacts of a finetuning job."""
|
||||||
|
|
||||||
job_uuid: str
|
job_uuid: str
|
||||||
|
@ -458,27 +483,35 @@ class FinetuningJobArtifactsResponse:
|
||||||
# TODO(ashwin): metrics, evals
|
# TODO(ashwin): metrics, evals
|
||||||
|
|
||||||
|
|
||||||
class Finetuning(Protocol):
|
class PostTraining(Protocol):
|
||||||
@webmethod(route="/finetuning/text_generation/train")
|
@webmethod(route="/post_training/supervised_fine_tune/")
|
||||||
def post_train(
|
def post_supervised_fine_tune(
|
||||||
self,
|
self,
|
||||||
request: FinetuningTrainRequest,
|
request: PostTrainingSFTRequest,
|
||||||
|
) -> None: ...
|
||||||
|
|
||||||
|
@webmethod(route="/post_training/preference_optimize/")
|
||||||
|
def post_preference_optimize(
|
||||||
|
self,
|
||||||
|
request: PostTrainingRLHFRequest,
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
|
|
||||||
# sends SSE stream of logs
|
# sends SSE stream of logs
|
||||||
@webmethod(route="/finetuning/job/logs")
|
@webmethod(route="/post_training/job/logs")
|
||||||
def get_training_log_stream(self, job_uuid: str) -> FinetuningJobLogStream: ...
|
def get_training_log_stream(self, job_uuid: str) -> PostTrainingJobLogStream: ...
|
||||||
|
|
||||||
@webmethod(route="/finetuning/job/status")
|
@webmethod(route="/post_training/job/status")
|
||||||
def get_training_job_status(self, job_uuid: str) -> FinetuningJobStatusResponse: ...
|
def get_training_job_status(
|
||||||
|
self, job_uuid: str
|
||||||
|
) -> PostTrainingJobStatusResponse: ...
|
||||||
|
|
||||||
@webmethod(route="/finetuning/job/cancel")
|
@webmethod(route="/post_training/job/cancel")
|
||||||
def cancel_training_job(self, job_uuid: str) -> None: ...
|
def cancel_training_job(self, job_uuid: str) -> None: ...
|
||||||
|
|
||||||
@webmethod(route="/finetuning/job/artifacts")
|
@webmethod(route="/post_training/job/artifacts")
|
||||||
def get_training_job_artifacts(
|
def get_training_job_artifacts(
|
||||||
self, job_uuid: str
|
self, job_uuid: str
|
||||||
) -> FinetuningJobArtifactsResponse: ...
|
) -> PostTrainingJobArtifactsResponse: ...
|
||||||
|
|
||||||
|
|
||||||
class LlamaStackEndpoints(
|
class LlamaStackEndpoints(
|
||||||
|
@ -487,7 +520,7 @@ class LlamaStackEndpoints(
|
||||||
RewardScoring,
|
RewardScoring,
|
||||||
SyntheticDataGeneration,
|
SyntheticDataGeneration,
|
||||||
Datasets,
|
Datasets,
|
||||||
Finetuning,
|
PostTraining,
|
||||||
MemoryBanks,
|
MemoryBanks,
|
||||||
): ...
|
): ...
|
||||||
|
|
||||||
|
|
|
@ -299,7 +299,7 @@
|
||||||
"parameters": []
|
"parameters": []
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/finetuning/job/artifacts": {
|
"/post_training/job/artifacts": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -307,14 +307,14 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/FinetuningJobArtifactsResponse"
|
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"Finetuning"
|
"PostTraining"
|
||||||
],
|
],
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
|
@ -328,7 +328,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/finetuning/job/status": {
|
"/post_training/job/status": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -336,14 +336,14 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/FinetuningJobStatusResponse"
|
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"Finetuning"
|
"PostTraining"
|
||||||
],
|
],
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
|
@ -357,7 +357,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/finetuning/job/logs": {
|
"/post_training/job/logs": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -365,14 +365,14 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/FinetuningJobLogStream"
|
"$ref": "#/components/schemas/PostTrainingJobLogStream"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"Finetuning"
|
"PostTraining"
|
||||||
],
|
],
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
|
@ -664,6 +664,29 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/post_training/preference_optimize/": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"PostTraining"
|
||||||
|
],
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/PostTrainingRLHFRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/reward_scoring/score": {
|
"/reward_scoring/score": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -694,7 +717,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/finetuning/text_generation/train": {
|
"/post_training/supervised_fine_tune/": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -702,14 +725,14 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"Finetuning"
|
"PostTraining"
|
||||||
],
|
],
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/FinetuningTrainRequest"
|
"$ref": "#/components/schemas/PostTrainingSFTRequest"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -1697,7 +1720,7 @@
|
||||||
"name"
|
"name"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"FinetuningJobArtifactsResponse": {
|
"PostTrainingJobArtifactsResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"job_uuid": {
|
"job_uuid": {
|
||||||
|
@ -1730,7 +1753,7 @@
|
||||||
],
|
],
|
||||||
"title": "Artifacts of a finetuning job."
|
"title": "Artifacts of a finetuning job."
|
||||||
},
|
},
|
||||||
"FinetuningJobStatusResponse": {
|
"PostTrainingJobStatusResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"job_uuid": {
|
"job_uuid": {
|
||||||
|
@ -1810,7 +1833,7 @@
|
||||||
],
|
],
|
||||||
"title": "Status of a finetuning job."
|
"title": "Status of a finetuning job."
|
||||||
},
|
},
|
||||||
"FinetuningJobLogStream": {
|
"PostTrainingJobLogStream": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"job_uuid": {
|
"job_uuid": {
|
||||||
|
@ -2672,6 +2695,191 @@
|
||||||
],
|
],
|
||||||
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||||
},
|
},
|
||||||
|
"DPOAlignmentConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reward_scale": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"reward_clip": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"epsilon": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"gamma": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reward_scale",
|
||||||
|
"reward_clip",
|
||||||
|
"epsilon",
|
||||||
|
"gamma"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"OptimizerConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"optimizer_type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"adam",
|
||||||
|
"adamw",
|
||||||
|
"sgd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lr": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"lr_min": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"weight_decay": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"optimizer_type",
|
||||||
|
"lr",
|
||||||
|
"lr_min",
|
||||||
|
"weight_decay"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"PostTrainingRLHFRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"job_uuid": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"finetuned_model": {
|
||||||
|
"$ref": "#/components/schemas/URL"
|
||||||
|
},
|
||||||
|
"dataset": {
|
||||||
|
"$ref": "#/components/schemas/Dataset"
|
||||||
|
},
|
||||||
|
"validation_dataset": {
|
||||||
|
"$ref": "#/components/schemas/Dataset"
|
||||||
|
},
|
||||||
|
"algorithm": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"dpo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"algorithm_config": {
|
||||||
|
"$ref": "#/components/schemas/DPOAlignmentConfig"
|
||||||
|
},
|
||||||
|
"optimizer_config": {
|
||||||
|
"$ref": "#/components/schemas/OptimizerConfig"
|
||||||
|
},
|
||||||
|
"training_config": {
|
||||||
|
"$ref": "#/components/schemas/TrainingConfig"
|
||||||
|
},
|
||||||
|
"hyperparam_search_config": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"logger_config": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"job_uuid",
|
||||||
|
"finetuned_model",
|
||||||
|
"dataset",
|
||||||
|
"validation_dataset",
|
||||||
|
"algorithm",
|
||||||
|
"algorithm_config",
|
||||||
|
"optimizer_config",
|
||||||
|
"training_config",
|
||||||
|
"hyperparam_search_config",
|
||||||
|
"logger_config"
|
||||||
|
],
|
||||||
|
"title": "Request to finetune a model."
|
||||||
|
},
|
||||||
|
"TrainingConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"n_epochs": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"batch_size": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"shuffle": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"n_iters": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"enable_activation_checkpointing": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"memory_efficient_fsdp_wrap": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"fsdp_cpu_offload": {
|
||||||
|
"type": "boolean"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"n_epochs",
|
||||||
|
"batch_size",
|
||||||
|
"shuffle",
|
||||||
|
"n_iters",
|
||||||
|
"enable_activation_checkpointing",
|
||||||
|
"memory_efficient_fsdp_wrap",
|
||||||
|
"fsdp_cpu_offload"
|
||||||
|
]
|
||||||
|
},
|
||||||
"RewardScoringRequest": {
|
"RewardScoringRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -2727,7 +2935,69 @@
|
||||||
],
|
],
|
||||||
"title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
|
"title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||||
},
|
},
|
||||||
"FinetuningTrainRequest": {
|
"DoraFinetuningConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"lora_attn_modules": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"apply_lora_to_mlp": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"apply_lora_to_output": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"rank": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"alpha": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"lora_attn_modules",
|
||||||
|
"apply_lora_to_mlp",
|
||||||
|
"apply_lora_to_output",
|
||||||
|
"rank",
|
||||||
|
"alpha"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"LoraFinetuningConfig": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"lora_attn_modules": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"apply_lora_to_mlp": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"apply_lora_to_output": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"rank": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"alpha": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"lora_attn_modules",
|
||||||
|
"apply_lora_to_mlp",
|
||||||
|
"apply_lora_to_output",
|
||||||
|
"rank",
|
||||||
|
"alpha"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"PostTrainingSFTRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"job_uuid": {
|
"job_uuid": {
|
||||||
|
@ -2761,66 +3031,10 @@
|
||||||
"$ref": "#/components/schemas/LoraFinetuningConfig"
|
"$ref": "#/components/schemas/LoraFinetuningConfig"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "object",
|
"$ref": "#/components/schemas/QLoraFinetuningConfig"
|
||||||
"properties": {
|
|
||||||
"lora_attn_modules": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"apply_lora_to_mlp": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"apply_lora_to_output": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"rank": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"alpha": {
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"lora_attn_modules",
|
|
||||||
"apply_lora_to_mlp",
|
|
||||||
"apply_lora_to_output",
|
|
||||||
"rank",
|
|
||||||
"alpha"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "object",
|
"$ref": "#/components/schemas/DoraFinetuningConfig"
|
||||||
"properties": {
|
|
||||||
"lora_attn_modules": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"apply_lora_to_mlp": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"apply_lora_to_output": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"rank": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"alpha": {
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"lora_attn_modules",
|
|
||||||
"apply_lora_to_mlp",
|
|
||||||
"apply_lora_to_output",
|
|
||||||
"rank",
|
|
||||||
"alpha"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -2896,7 +3110,7 @@
|
||||||
],
|
],
|
||||||
"title": "Request to finetune a model."
|
"title": "Request to finetune a model."
|
||||||
},
|
},
|
||||||
"LoraFinetuningConfig": {
|
"QLoraFinetuningConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"lora_attn_modules": {
|
"lora_attn_modules": {
|
||||||
|
@ -2926,71 +3140,6 @@
|
||||||
"rank",
|
"rank",
|
||||||
"alpha"
|
"alpha"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
"OptimizerConfig": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"optimizer_type": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"adam",
|
|
||||||
"adamw",
|
|
||||||
"sgd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"lr": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"lr_min": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"weight_decay": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"optimizer_type",
|
|
||||||
"lr",
|
|
||||||
"lr_min",
|
|
||||||
"weight_decay"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"TrainingConfig": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"n_epochs": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"batch_size": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"shuffle": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"n_iters": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"enable_activation_checkpointing": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"memory_efficient_fsdp_wrap": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fsdp_cpu_offload": {
|
|
||||||
"type": "boolean"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"n_epochs",
|
|
||||||
"batch_size",
|
|
||||||
"shuffle",
|
|
||||||
"n_iters",
|
|
||||||
"enable_activation_checkpointing",
|
|
||||||
"memory_efficient_fsdp_wrap",
|
|
||||||
"fsdp_cpu_offload"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"responses": {}
|
"responses": {}
|
||||||
|
@ -3001,27 +3150,27 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"tags": [
|
"tags": [
|
||||||
{
|
|
||||||
"name": "RewardScoring"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MemoryBanks"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SyntheticDataGeneration"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Finetuning"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "AgenticSystem"
|
"name": "AgenticSystem"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "RewardScoring"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference"
|
"name": "Inference"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "SyntheticDataGeneration"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Datasets"
|
"name": "Datasets"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "PostTraining"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MemoryBanks"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "ShieldConfig",
|
"name": "ShieldConfig",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />"
|
||||||
|
@ -3075,16 +3224,16 @@
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "FinetuningJobArtifactsResponse",
|
"name": "PostTrainingJobArtifactsResponse",
|
||||||
"description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobArtifactsResponse\" />"
|
"description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobArtifactsResponse\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "FinetuningJobStatusResponse",
|
"name": "PostTrainingJobStatusResponse",
|
||||||
"description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobStatusResponse\" />"
|
"description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobStatusResponse\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "FinetuningJobLogStream",
|
"name": "PostTrainingJobLogStream",
|
||||||
"description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />"
|
"description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobLogStream\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "BatchChatCompletionRequest",
|
"name": "BatchChatCompletionRequest",
|
||||||
|
@ -3138,6 +3287,22 @@
|
||||||
"name": "SyntheticDataGenerationResponse",
|
"name": "SyntheticDataGenerationResponse",
|
||||||
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
|
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "DPOAlignmentConfig",
|
||||||
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DPOAlignmentConfig\" />"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "OptimizerConfig",
|
||||||
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "PostTrainingRLHFRequest",
|
||||||
|
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingRLHFRequest\" />"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "TrainingConfig",
|
||||||
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "RewardScoringRequest",
|
"name": "RewardScoringRequest",
|
||||||
"description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
|
"description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
|
||||||
|
@ -3147,20 +3312,20 @@
|
||||||
"description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
|
"description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "FinetuningTrainRequest",
|
"name": "DoraFinetuningConfig",
|
||||||
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningTrainRequest\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DoraFinetuningConfig\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "LoraFinetuningConfig",
|
"name": "LoraFinetuningConfig",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/LoraFinetuningConfig\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/LoraFinetuningConfig\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "OptimizerConfig",
|
"name": "PostTrainingSFTRequest",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
|
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingSFTRequest\" />"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "TrainingConfig",
|
"name": "QLoraFinetuningConfig",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/QLoraFinetuningConfig\" />"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"x-tagGroups": [
|
"x-tagGroups": [
|
||||||
|
@ -3169,9 +3334,9 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"AgenticSystem",
|
"AgenticSystem",
|
||||||
"Datasets",
|
"Datasets",
|
||||||
"Finetuning",
|
|
||||||
"Inference",
|
"Inference",
|
||||||
"MemoryBanks",
|
"MemoryBanks",
|
||||||
|
"PostTraining",
|
||||||
"RewardScoring",
|
"RewardScoring",
|
||||||
"SyntheticDataGeneration"
|
"SyntheticDataGeneration"
|
||||||
]
|
]
|
||||||
|
@ -3195,18 +3360,22 @@
|
||||||
"CompletionResponse",
|
"CompletionResponse",
|
||||||
"CompletionResponseStreamChunk",
|
"CompletionResponseStreamChunk",
|
||||||
"CreateDatasetRequest",
|
"CreateDatasetRequest",
|
||||||
|
"DPOAlignmentConfig",
|
||||||
"Dataset",
|
"Dataset",
|
||||||
"Dialog",
|
"Dialog",
|
||||||
"FinetuningJobArtifactsResponse",
|
"DoraFinetuningConfig",
|
||||||
"FinetuningJobLogStream",
|
|
||||||
"FinetuningJobStatusResponse",
|
|
||||||
"FinetuningTrainRequest",
|
|
||||||
"KScoredPromptGenerations",
|
"KScoredPromptGenerations",
|
||||||
"LoraFinetuningConfig",
|
"LoraFinetuningConfig",
|
||||||
"MemoryBank",
|
"MemoryBank",
|
||||||
"Message",
|
"Message",
|
||||||
"MessageScore",
|
"MessageScore",
|
||||||
"OptimizerConfig",
|
"OptimizerConfig",
|
||||||
|
"PostTrainingJobArtifactsResponse",
|
||||||
|
"PostTrainingJobLogStream",
|
||||||
|
"PostTrainingJobStatusResponse",
|
||||||
|
"PostTrainingRLHFRequest",
|
||||||
|
"PostTrainingSFTRequest",
|
||||||
|
"QLoraFinetuningConfig",
|
||||||
"RewardScoringRequest",
|
"RewardScoringRequest",
|
||||||
"RewardScoringResponse",
|
"RewardScoringResponse",
|
||||||
"ShieldConfig",
|
"ShieldConfig",
|
||||||
|
|
|
@ -879,6 +879,23 @@ components:
|
||||||
- dataset
|
- dataset
|
||||||
title: Request to create a dataset.
|
title: Request to create a dataset.
|
||||||
type: object
|
type: object
|
||||||
|
DPOAlignmentConfig:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
epsilon:
|
||||||
|
type: number
|
||||||
|
gamma:
|
||||||
|
type: number
|
||||||
|
reward_clip:
|
||||||
|
type: number
|
||||||
|
reward_scale:
|
||||||
|
type: number
|
||||||
|
required:
|
||||||
|
- reward_scale
|
||||||
|
- reward_clip
|
||||||
|
- epsilon
|
||||||
|
- gamma
|
||||||
|
type: object
|
||||||
Dataset:
|
Dataset:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -923,195 +940,27 @@ components:
|
||||||
- message
|
- message
|
||||||
- message_history
|
- message_history
|
||||||
type: object
|
type: object
|
||||||
FinetuningJobArtifactsResponse:
|
DoraFinetuningConfig:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
checkpoints:
|
alpha:
|
||||||
items:
|
type: integer
|
||||||
additionalProperties: false
|
apply_lora_to_mlp:
|
||||||
properties:
|
type: boolean
|
||||||
iters:
|
apply_lora_to_output:
|
||||||
type: integer
|
type: boolean
|
||||||
path:
|
lora_attn_modules:
|
||||||
$ref: '#/components/schemas/URL'
|
|
||||||
required:
|
|
||||||
- iters
|
|
||||||
- path
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
job_uuid:
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- job_uuid
|
|
||||||
- checkpoints
|
|
||||||
title: Artifacts of a finetuning job.
|
|
||||||
type: object
|
|
||||||
FinetuningJobLogStream:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
job_uuid:
|
|
||||||
type: string
|
|
||||||
log_lines:
|
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
|
rank:
|
||||||
|
type: integer
|
||||||
required:
|
required:
|
||||||
- job_uuid
|
- lora_attn_modules
|
||||||
- log_lines
|
- apply_lora_to_mlp
|
||||||
title: Stream of logs from a finetuning job.
|
- apply_lora_to_output
|
||||||
type: object
|
- rank
|
||||||
FinetuningJobStatusResponse:
|
- alpha
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
checkpoints:
|
|
||||||
items:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
iters:
|
|
||||||
type: integer
|
|
||||||
path:
|
|
||||||
$ref: '#/components/schemas/URL'
|
|
||||||
required:
|
|
||||||
- iters
|
|
||||||
- path
|
|
||||||
type: object
|
|
||||||
type: array
|
|
||||||
completed_at:
|
|
||||||
format: date-time
|
|
||||||
type: string
|
|
||||||
job_uuid:
|
|
||||||
type: string
|
|
||||||
resources_allocated:
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
type: object
|
|
||||||
scheduled_at:
|
|
||||||
format: date-time
|
|
||||||
type: string
|
|
||||||
started_at:
|
|
||||||
format: date-time
|
|
||||||
type: string
|
|
||||||
status:
|
|
||||||
enum:
|
|
||||||
- running
|
|
||||||
- completed
|
|
||||||
- failed
|
|
||||||
- scheduled
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- job_uuid
|
|
||||||
- status
|
|
||||||
- checkpoints
|
|
||||||
title: Status of a finetuning job.
|
|
||||||
type: object
|
|
||||||
FinetuningTrainRequest:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
algorithm:
|
|
||||||
enum:
|
|
||||||
- full
|
|
||||||
- lora
|
|
||||||
- qlora
|
|
||||||
- dora
|
|
||||||
type: string
|
|
||||||
algorithm_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
alpha:
|
|
||||||
type: integer
|
|
||||||
apply_lora_to_mlp:
|
|
||||||
type: boolean
|
|
||||||
apply_lora_to_output:
|
|
||||||
type: boolean
|
|
||||||
lora_attn_modules:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
rank:
|
|
||||||
type: integer
|
|
||||||
required:
|
|
||||||
- lora_attn_modules
|
|
||||||
- apply_lora_to_mlp
|
|
||||||
- apply_lora_to_output
|
|
||||||
- rank
|
|
||||||
- alpha
|
|
||||||
type: object
|
|
||||||
- additionalProperties: false
|
|
||||||
properties:
|
|
||||||
alpha:
|
|
||||||
type: integer
|
|
||||||
apply_lora_to_mlp:
|
|
||||||
type: boolean
|
|
||||||
apply_lora_to_output:
|
|
||||||
type: boolean
|
|
||||||
lora_attn_modules:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
rank:
|
|
||||||
type: integer
|
|
||||||
required:
|
|
||||||
- lora_attn_modules
|
|
||||||
- apply_lora_to_mlp
|
|
||||||
- apply_lora_to_output
|
|
||||||
- rank
|
|
||||||
- alpha
|
|
||||||
type: object
|
|
||||||
dataset:
|
|
||||||
$ref: '#/components/schemas/Dataset'
|
|
||||||
hyperparam_search_config:
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
type: object
|
|
||||||
job_uuid:
|
|
||||||
type: string
|
|
||||||
logger_config:
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
type: object
|
|
||||||
model:
|
|
||||||
enum:
|
|
||||||
- llama3_8b
|
|
||||||
- llama3_70b
|
|
||||||
type: string
|
|
||||||
optimizer_config:
|
|
||||||
$ref: '#/components/schemas/OptimizerConfig'
|
|
||||||
training_config:
|
|
||||||
$ref: '#/components/schemas/TrainingConfig'
|
|
||||||
validation_dataset:
|
|
||||||
$ref: '#/components/schemas/Dataset'
|
|
||||||
required:
|
|
||||||
- job_uuid
|
|
||||||
- model
|
|
||||||
- dataset
|
|
||||||
- validation_dataset
|
|
||||||
- algorithm
|
|
||||||
- algorithm_config
|
|
||||||
- optimizer_config
|
|
||||||
- training_config
|
|
||||||
- hyperparam_search_config
|
|
||||||
- logger_config
|
|
||||||
title: Request to finetune a model.
|
|
||||||
type: object
|
type: object
|
||||||
KScoredPromptGenerations:
|
KScoredPromptGenerations:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
@ -1259,6 +1108,232 @@ components:
|
||||||
- lr_min
|
- lr_min
|
||||||
- weight_decay
|
- weight_decay
|
||||||
type: object
|
type: object
|
||||||
|
PostTrainingJobArtifactsResponse:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
checkpoints:
|
||||||
|
items:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
iters:
|
||||||
|
type: integer
|
||||||
|
path:
|
||||||
|
$ref: '#/components/schemas/URL'
|
||||||
|
required:
|
||||||
|
- iters
|
||||||
|
- path
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
job_uuid:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- job_uuid
|
||||||
|
- checkpoints
|
||||||
|
title: Artifacts of a finetuning job.
|
||||||
|
type: object
|
||||||
|
PostTrainingJobLogStream:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
job_uuid:
|
||||||
|
type: string
|
||||||
|
log_lines:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
required:
|
||||||
|
- job_uuid
|
||||||
|
- log_lines
|
||||||
|
title: Stream of logs from a finetuning job.
|
||||||
|
type: object
|
||||||
|
PostTrainingJobStatusResponse:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
checkpoints:
|
||||||
|
items:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
iters:
|
||||||
|
type: integer
|
||||||
|
path:
|
||||||
|
$ref: '#/components/schemas/URL'
|
||||||
|
required:
|
||||||
|
- iters
|
||||||
|
- path
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
|
completed_at:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
job_uuid:
|
||||||
|
type: string
|
||||||
|
resources_allocated:
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type: object
|
||||||
|
scheduled_at:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
started_at:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
status:
|
||||||
|
enum:
|
||||||
|
- running
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- scheduled
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- job_uuid
|
||||||
|
- status
|
||||||
|
- checkpoints
|
||||||
|
title: Status of a finetuning job.
|
||||||
|
type: object
|
||||||
|
PostTrainingRLHFRequest:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
algorithm:
|
||||||
|
enum:
|
||||||
|
- dpo
|
||||||
|
type: string
|
||||||
|
algorithm_config:
|
||||||
|
$ref: '#/components/schemas/DPOAlignmentConfig'
|
||||||
|
dataset:
|
||||||
|
$ref: '#/components/schemas/Dataset'
|
||||||
|
finetuned_model:
|
||||||
|
$ref: '#/components/schemas/URL'
|
||||||
|
hyperparam_search_config:
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type: object
|
||||||
|
job_uuid:
|
||||||
|
type: string
|
||||||
|
logger_config:
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type: object
|
||||||
|
optimizer_config:
|
||||||
|
$ref: '#/components/schemas/OptimizerConfig'
|
||||||
|
training_config:
|
||||||
|
$ref: '#/components/schemas/TrainingConfig'
|
||||||
|
validation_dataset:
|
||||||
|
$ref: '#/components/schemas/Dataset'
|
||||||
|
required:
|
||||||
|
- job_uuid
|
||||||
|
- finetuned_model
|
||||||
|
- dataset
|
||||||
|
- validation_dataset
|
||||||
|
- algorithm
|
||||||
|
- algorithm_config
|
||||||
|
- optimizer_config
|
||||||
|
- training_config
|
||||||
|
- hyperparam_search_config
|
||||||
|
- logger_config
|
||||||
|
title: Request to finetune a model.
|
||||||
|
type: object
|
||||||
|
PostTrainingSFTRequest:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
algorithm:
|
||||||
|
enum:
|
||||||
|
- full
|
||||||
|
- lora
|
||||||
|
- qlora
|
||||||
|
- dora
|
||||||
|
type: string
|
||||||
|
algorithm_config:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||||
|
- $ref: '#/components/schemas/QLoraFinetuningConfig'
|
||||||
|
- $ref: '#/components/schemas/DoraFinetuningConfig'
|
||||||
|
dataset:
|
||||||
|
$ref: '#/components/schemas/Dataset'
|
||||||
|
hyperparam_search_config:
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type: object
|
||||||
|
job_uuid:
|
||||||
|
type: string
|
||||||
|
logger_config:
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type: object
|
||||||
|
model:
|
||||||
|
enum:
|
||||||
|
- llama3_8b
|
||||||
|
- llama3_70b
|
||||||
|
type: string
|
||||||
|
optimizer_config:
|
||||||
|
$ref: '#/components/schemas/OptimizerConfig'
|
||||||
|
training_config:
|
||||||
|
$ref: '#/components/schemas/TrainingConfig'
|
||||||
|
validation_dataset:
|
||||||
|
$ref: '#/components/schemas/Dataset'
|
||||||
|
required:
|
||||||
|
- job_uuid
|
||||||
|
- model
|
||||||
|
- dataset
|
||||||
|
- validation_dataset
|
||||||
|
- algorithm
|
||||||
|
- algorithm_config
|
||||||
|
- optimizer_config
|
||||||
|
- training_config
|
||||||
|
- hyperparam_search_config
|
||||||
|
- logger_config
|
||||||
|
title: Request to finetune a model.
|
||||||
|
type: object
|
||||||
|
QLoraFinetuningConfig:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
alpha:
|
||||||
|
type: integer
|
||||||
|
apply_lora_to_mlp:
|
||||||
|
type: boolean
|
||||||
|
apply_lora_to_output:
|
||||||
|
type: boolean
|
||||||
|
lora_attn_modules:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
|
rank:
|
||||||
|
type: integer
|
||||||
|
required:
|
||||||
|
- lora_attn_modules
|
||||||
|
- apply_lora_to_mlp
|
||||||
|
- apply_lora_to_output
|
||||||
|
- rank
|
||||||
|
- alpha
|
||||||
|
type: object
|
||||||
RewardScoringRequest:
|
RewardScoringRequest:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -1581,71 +1656,6 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Datasets
|
- Datasets
|
||||||
/finetuning/job/artifacts:
|
|
||||||
get:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: job_uuid
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/FinetuningJobArtifactsResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Finetuning
|
|
||||||
/finetuning/job/logs:
|
|
||||||
get:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: job_uuid
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/FinetuningJobLogStream'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Finetuning
|
|
||||||
/finetuning/job/status:
|
|
||||||
get:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: job_uuid
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/FinetuningJobStatusResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Finetuning
|
|
||||||
/finetuning/text_generation/train:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/FinetuningTrainRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Finetuning
|
|
||||||
/memory_banks/create:
|
/memory_banks/create:
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -1787,6 +1797,85 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- MemoryBanks
|
- MemoryBanks
|
||||||
|
/post_training/job/artifacts:
|
||||||
|
get:
|
||||||
|
parameters:
|
||||||
|
- in: query
|
||||||
|
name: job_uuid
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- PostTraining
|
||||||
|
/post_training/job/logs:
|
||||||
|
get:
|
||||||
|
parameters:
|
||||||
|
- in: query
|
||||||
|
name: job_uuid
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/PostTrainingJobLogStream'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- PostTraining
|
||||||
|
/post_training/job/status:
|
||||||
|
get:
|
||||||
|
parameters:
|
||||||
|
- in: query
|
||||||
|
name: job_uuid
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- PostTraining
|
||||||
|
/post_training/preference_optimize/:
|
||||||
|
post:
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/PostTrainingRLHFRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- PostTraining
|
||||||
|
/post_training/supervised_fine_tune/:
|
||||||
|
post:
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/PostTrainingSFTRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- PostTraining
|
||||||
/reward_scoring/score:
|
/reward_scoring/score:
|
||||||
post:
|
post:
|
||||||
parameters: []
|
parameters: []
|
||||||
|
@ -1828,13 +1917,13 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://llama.meta.com
|
- url: http://llama.meta.com
|
||||||
tags:
|
tags:
|
||||||
- name: RewardScoring
|
|
||||||
- name: MemoryBanks
|
|
||||||
- name: SyntheticDataGeneration
|
|
||||||
- name: Finetuning
|
|
||||||
- name: AgenticSystem
|
- name: AgenticSystem
|
||||||
|
- name: RewardScoring
|
||||||
- name: Inference
|
- name: Inference
|
||||||
|
- name: SyntheticDataGeneration
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
|
- name: PostTraining
|
||||||
|
- name: MemoryBanks
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
||||||
name: ShieldConfig
|
name: ShieldConfig
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||||
|
@ -1888,20 +1977,20 @@ tags:
|
||||||
- description: 'Artifacts of a finetuning job.
|
- description: 'Artifacts of a finetuning job.
|
||||||
|
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobArtifactsResponse"
|
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobArtifactsResponse"
|
||||||
/>'
|
/>'
|
||||||
name: FinetuningJobArtifactsResponse
|
name: PostTrainingJobArtifactsResponse
|
||||||
- description: 'Status of a finetuning job.
|
- description: 'Status of a finetuning job.
|
||||||
|
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobStatusResponse"
|
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobStatusResponse"
|
||||||
/>'
|
/>'
|
||||||
name: FinetuningJobStatusResponse
|
name: PostTrainingJobStatusResponse
|
||||||
- description: 'Stream of logs from a finetuning job.
|
- description: 'Stream of logs from a finetuning job.
|
||||||
|
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />'
|
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobLogStream" />'
|
||||||
name: FinetuningJobLogStream
|
name: PostTrainingJobLogStream
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
||||||
/>
|
/>
|
||||||
name: BatchChatCompletionRequest
|
name: BatchChatCompletionRequest
|
||||||
|
@ -1961,6 +2050,19 @@ tags:
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
|
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
|
||||||
/>'
|
/>'
|
||||||
name: SyntheticDataGenerationResponse
|
name: SyntheticDataGenerationResponse
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/DPOAlignmentConfig"
|
||||||
|
/>
|
||||||
|
name: DPOAlignmentConfig
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
|
||||||
|
/>
|
||||||
|
name: OptimizerConfig
|
||||||
|
- description: 'Request to finetune a model.
|
||||||
|
|
||||||
|
|
||||||
|
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingRLHFRequest" />'
|
||||||
|
name: PostTrainingRLHFRequest
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
|
||||||
|
name: TrainingConfig
|
||||||
- description: 'Request to score a reward function. A list of prompts and a list of
|
- description: 'Request to score a reward function. A list of prompts and a list of
|
||||||
responses per prompt.
|
responses per prompt.
|
||||||
|
|
||||||
|
@ -1973,27 +2075,28 @@ tags:
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
|
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
|
||||||
name: RewardScoringResponse
|
name: RewardScoringResponse
|
||||||
- description: 'Request to finetune a model.
|
- description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
|
||||||
|
/>
|
||||||
|
name: DoraFinetuningConfig
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/FinetuningTrainRequest" />'
|
|
||||||
name: FinetuningTrainRequest
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
|
||||||
/>
|
/>
|
||||||
name: LoraFinetuningConfig
|
name: LoraFinetuningConfig
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
|
- description: 'Request to finetune a model.
|
||||||
|
|
||||||
|
|
||||||
|
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
|
||||||
|
name: PostTrainingSFTRequest
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
|
||||||
/>
|
/>
|
||||||
name: OptimizerConfig
|
name: QLoraFinetuningConfig
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
|
|
||||||
name: TrainingConfig
|
|
||||||
x-tagGroups:
|
x-tagGroups:
|
||||||
- name: Operations
|
- name: Operations
|
||||||
tags:
|
tags:
|
||||||
- AgenticSystem
|
- AgenticSystem
|
||||||
- Datasets
|
- Datasets
|
||||||
- Finetuning
|
|
||||||
- Inference
|
- Inference
|
||||||
- MemoryBanks
|
- MemoryBanks
|
||||||
|
- PostTraining
|
||||||
- RewardScoring
|
- RewardScoring
|
||||||
- SyntheticDataGeneration
|
- SyntheticDataGeneration
|
||||||
- name: Types
|
- name: Types
|
||||||
|
@ -2014,18 +2117,22 @@ x-tagGroups:
|
||||||
- CompletionResponse
|
- CompletionResponse
|
||||||
- CompletionResponseStreamChunk
|
- CompletionResponseStreamChunk
|
||||||
- CreateDatasetRequest
|
- CreateDatasetRequest
|
||||||
|
- DPOAlignmentConfig
|
||||||
- Dataset
|
- Dataset
|
||||||
- Dialog
|
- Dialog
|
||||||
- FinetuningJobArtifactsResponse
|
- DoraFinetuningConfig
|
||||||
- FinetuningJobLogStream
|
|
||||||
- FinetuningJobStatusResponse
|
|
||||||
- FinetuningTrainRequest
|
|
||||||
- KScoredPromptGenerations
|
- KScoredPromptGenerations
|
||||||
- LoraFinetuningConfig
|
- LoraFinetuningConfig
|
||||||
- MemoryBank
|
- MemoryBank
|
||||||
- Message
|
- Message
|
||||||
- MessageScore
|
- MessageScore
|
||||||
- OptimizerConfig
|
- OptimizerConfig
|
||||||
|
- PostTrainingJobArtifactsResponse
|
||||||
|
- PostTrainingJobLogStream
|
||||||
|
- PostTrainingJobStatusResponse
|
||||||
|
- PostTrainingRLHFRequest
|
||||||
|
- PostTrainingSFTRequest
|
||||||
|
- QLoraFinetuningConfig
|
||||||
- RewardScoringRequest
|
- RewardScoringRequest
|
||||||
- RewardScoringResponse
|
- RewardScoringResponse
|
||||||
- ShieldConfig
|
- ShieldConfig
|
||||||
|
|
|
@ -72,11 +72,13 @@ class LoraFinetuningConfig:
|
||||||
alpha: int
|
alpha: int
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
@dataclass
|
@dataclass
|
||||||
class QLoraFinetuningConfig(LoraFinetuningConfig):
|
class QLoraFinetuningConfig(LoraFinetuningConfig):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
@dataclass
|
@dataclass
|
||||||
class DoraFinetuningConfig(LoraFinetuningConfig):
|
class DoraFinetuningConfig(LoraFinetuningConfig):
|
||||||
pass
|
pass
|
||||||
|
@ -84,14 +86,14 @@ class DoraFinetuningConfig(LoraFinetuningConfig):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
@dataclass
|
@dataclass
|
||||||
class FinetuningJobLogStream:
|
class PostTrainingJobLogStream:
|
||||||
"""Stream of logs from a finetuning job."""
|
"""Stream of logs from a finetuning job."""
|
||||||
|
|
||||||
job_uuid: str
|
job_uuid: str
|
||||||
log_lines: List[str]
|
log_lines: List[str]
|
||||||
|
|
||||||
|
|
||||||
class FinetuningJobStatus(Enum):
|
class PostTrainingJobStatus(Enum):
|
||||||
running = "running"
|
running = "running"
|
||||||
completed = "completed"
|
completed = "completed"
|
||||||
failed = "failed"
|
failed = "failed"
|
||||||
|
@ -102,3 +104,16 @@ class FinetuningJobStatus(Enum):
|
||||||
class Checkpoint:
|
class Checkpoint:
|
||||||
iters: int
|
iters: int
|
||||||
path: URL
|
path: URL
|
||||||
|
|
||||||
|
|
||||||
|
class RLHFAlgorithm(Enum):
|
||||||
|
dpo = "dpo"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
@dataclass
|
||||||
|
class DPOAlignmentConfig:
|
||||||
|
reward_scale: float
|
||||||
|
reward_clip: float
|
||||||
|
epsilon: float
|
||||||
|
gamma: float
|
Loading…
Add table
Add a link
Reference in a new issue