added DPO

This commit is contained in:
Ashwin Bharambe 2024-07-11 00:01:58 -07:00
parent 7cade3acc3
commit 631328f556
4 changed files with 796 additions and 472 deletions

View file

@ -12,19 +12,6 @@ from agentic_system_types import (
SafetyViolation, SafetyViolation,
) )
from finetuning_types import (
Checkpoint,
Dataset,
DoraFinetuningConfig,
FinetuningAlgorithm,
FinetuningJobLogStream,
FinetuningJobStatus,
LoraFinetuningConfig,
OptimizerConfig,
QLoraFinetuningConfig,
TrainingConfig,
)
from model_types import ( from model_types import (
BuiltinTool, BuiltinTool,
Content, Content,
@ -42,6 +29,21 @@ from model_types import (
URL, URL,
) )
from post_training_types import (
Checkpoint,
Dataset,
DoraFinetuningConfig,
DPOAlignmentConfig,
FinetuningAlgorithm,
LoraFinetuningConfig,
OptimizerConfig,
PostTrainingJobLogStream,
PostTrainingJobStatus,
QLoraFinetuningConfig,
RLHFAlgorithm,
TrainingConfig,
)
from pyopenapi import Info, Options, Server, Specification, webmethod from pyopenapi import Info, Options, Server, Specification, webmethod
from strong_typing.schema import json_schema_type from strong_typing.schema import json_schema_type
@ -408,7 +410,7 @@ class Datasets(Protocol):
@json_schema_type @json_schema_type
@dataclass @dataclass
class FinetuningTrainRequest: class PostTrainingSFTRequest:
"""Request to finetune a model.""" """Request to finetune a model."""
job_uuid: str job_uuid: str
@ -432,11 +434,34 @@ class FinetuningTrainRequest:
@json_schema_type @json_schema_type
@dataclass @dataclass
class FinetuningJobStatusResponse: class PostTrainingRLHFRequest:
"""Request to finetune a model."""
job_uuid: str
finetuned_model: URL
dataset: Dataset
validation_dataset: Dataset
algorithm: RLHFAlgorithm
algorithm_config: Union[DPOAlignmentConfig]
optimizer_config: OptimizerConfig
training_config: TrainingConfig
# TODO: define these
hyperparam_search_config: Dict[str, Any]
logger_config: Dict[str, Any]
@json_schema_type
@dataclass
class PostTrainingJobStatusResponse:
"""Status of a finetuning job.""" """Status of a finetuning job."""
job_uuid: str job_uuid: str
status: FinetuningJobStatus status: PostTrainingJobStatus
scheduled_at: Optional[datetime] = None scheduled_at: Optional[datetime] = None
started_at: Optional[datetime] = None started_at: Optional[datetime] = None
@ -449,7 +474,7 @@ class FinetuningJobStatusResponse:
@json_schema_type @json_schema_type
@dataclass @dataclass
class FinetuningJobArtifactsResponse: class PostTrainingJobArtifactsResponse:
"""Artifacts of a finetuning job.""" """Artifacts of a finetuning job."""
job_uuid: str job_uuid: str
@ -458,27 +483,35 @@ class FinetuningJobArtifactsResponse:
# TODO(ashwin): metrics, evals # TODO(ashwin): metrics, evals
class Finetuning(Protocol): class PostTraining(Protocol):
@webmethod(route="/finetuning/text_generation/train") @webmethod(route="/post_training/supervised_fine_tune/")
def post_train( def post_supervised_fine_tune(
self, self,
request: FinetuningTrainRequest, request: PostTrainingSFTRequest,
) -> None: ...
@webmethod(route="/post_training/preference_optimize/")
def post_preference_optimize(
self,
request: PostTrainingRLHFRequest,
) -> None: ... ) -> None: ...
# sends SSE stream of logs # sends SSE stream of logs
@webmethod(route="/finetuning/job/logs") @webmethod(route="/post_training/job/logs")
def get_training_log_stream(self, job_uuid: str) -> FinetuningJobLogStream: ... def get_training_log_stream(self, job_uuid: str) -> PostTrainingJobLogStream: ...
@webmethod(route="/finetuning/job/status") @webmethod(route="/post_training/job/status")
def get_training_job_status(self, job_uuid: str) -> FinetuningJobStatusResponse: ... def get_training_job_status(
self, job_uuid: str
) -> PostTrainingJobStatusResponse: ...
@webmethod(route="/finetuning/job/cancel") @webmethod(route="/post_training/job/cancel")
def cancel_training_job(self, job_uuid: str) -> None: ... def cancel_training_job(self, job_uuid: str) -> None: ...
@webmethod(route="/finetuning/job/artifacts") @webmethod(route="/post_training/job/artifacts")
def get_training_job_artifacts( def get_training_job_artifacts(
self, job_uuid: str self, job_uuid: str
) -> FinetuningJobArtifactsResponse: ... ) -> PostTrainingJobArtifactsResponse: ...
class LlamaStackEndpoints( class LlamaStackEndpoints(
@ -487,7 +520,7 @@ class LlamaStackEndpoints(
RewardScoring, RewardScoring,
SyntheticDataGeneration, SyntheticDataGeneration,
Datasets, Datasets,
Finetuning, PostTraining,
MemoryBanks, MemoryBanks,
): ... ): ...

View file

@ -299,7 +299,7 @@
"parameters": [] "parameters": []
} }
}, },
"/finetuning/job/artifacts": { "/post_training/job/artifacts": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -307,14 +307,14 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/FinetuningJobArtifactsResponse" "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
} }
} }
} }
} }
}, },
"tags": [ "tags": [
"Finetuning" "PostTraining"
], ],
"parameters": [ "parameters": [
{ {
@ -328,7 +328,7 @@
] ]
} }
}, },
"/finetuning/job/status": { "/post_training/job/status": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -336,14 +336,14 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/FinetuningJobStatusResponse" "$ref": "#/components/schemas/PostTrainingJobStatusResponse"
} }
} }
} }
} }
}, },
"tags": [ "tags": [
"Finetuning" "PostTraining"
], ],
"parameters": [ "parameters": [
{ {
@ -357,7 +357,7 @@
] ]
} }
}, },
"/finetuning/job/logs": { "/post_training/job/logs": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -365,14 +365,14 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/FinetuningJobLogStream" "$ref": "#/components/schemas/PostTrainingJobLogStream"
} }
} }
} }
} }
}, },
"tags": [ "tags": [
"Finetuning" "PostTraining"
], ],
"parameters": [ "parameters": [
{ {
@ -664,6 +664,29 @@
} }
} }
}, },
"/post_training/preference_optimize/": {
"post": {
"responses": {
"200": {
"description": "OK"
}
},
"tags": [
"PostTraining"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PostTrainingRLHFRequest"
}
}
},
"required": true
}
}
},
"/reward_scoring/score": { "/reward_scoring/score": {
"post": { "post": {
"responses": { "responses": {
@ -694,7 +717,7 @@
} }
} }
}, },
"/finetuning/text_generation/train": { "/post_training/supervised_fine_tune/": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -702,14 +725,14 @@
} }
}, },
"tags": [ "tags": [
"Finetuning" "PostTraining"
], ],
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/FinetuningTrainRequest" "$ref": "#/components/schemas/PostTrainingSFTRequest"
} }
} }
}, },
@ -1697,7 +1720,7 @@
"name" "name"
] ]
}, },
"FinetuningJobArtifactsResponse": { "PostTrainingJobArtifactsResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"job_uuid": { "job_uuid": {
@ -1730,7 +1753,7 @@
], ],
"title": "Artifacts of a finetuning job." "title": "Artifacts of a finetuning job."
}, },
"FinetuningJobStatusResponse": { "PostTrainingJobStatusResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"job_uuid": { "job_uuid": {
@ -1810,7 +1833,7 @@
], ],
"title": "Status of a finetuning job." "title": "Status of a finetuning job."
}, },
"FinetuningJobLogStream": { "PostTrainingJobLogStream": {
"type": "object", "type": "object",
"properties": { "properties": {
"job_uuid": { "job_uuid": {
@ -2672,6 +2695,191 @@
], ],
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
}, },
"DPOAlignmentConfig": {
"type": "object",
"properties": {
"reward_scale": {
"type": "number"
},
"reward_clip": {
"type": "number"
},
"epsilon": {
"type": "number"
},
"gamma": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"reward_scale",
"reward_clip",
"epsilon",
"gamma"
]
},
"OptimizerConfig": {
"type": "object",
"properties": {
"optimizer_type": {
"type": "string",
"enum": [
"adam",
"adamw",
"sgd"
]
},
"lr": {
"type": "number"
},
"lr_min": {
"type": "number"
},
"weight_decay": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"optimizer_type",
"lr",
"lr_min",
"weight_decay"
]
},
"PostTrainingRLHFRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string"
},
"finetuned_model": {
"$ref": "#/components/schemas/URL"
},
"dataset": {
"$ref": "#/components/schemas/Dataset"
},
"validation_dataset": {
"$ref": "#/components/schemas/Dataset"
},
"algorithm": {
"type": "string",
"enum": [
"dpo"
]
},
"algorithm_config": {
"$ref": "#/components/schemas/DPOAlignmentConfig"
},
"optimizer_config": {
"$ref": "#/components/schemas/OptimizerConfig"
},
"training_config": {
"$ref": "#/components/schemas/TrainingConfig"
},
"hyperparam_search_config": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"logger_config": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"job_uuid",
"finetuned_model",
"dataset",
"validation_dataset",
"algorithm",
"algorithm_config",
"optimizer_config",
"training_config",
"hyperparam_search_config",
"logger_config"
],
"title": "Request to finetune a model."
},
"TrainingConfig": {
"type": "object",
"properties": {
"n_epochs": {
"type": "integer"
},
"batch_size": {
"type": "integer"
},
"shuffle": {
"type": "boolean"
},
"n_iters": {
"type": "integer"
},
"enable_activation_checkpointing": {
"type": "boolean"
},
"memory_efficient_fsdp_wrap": {
"type": "boolean"
},
"fsdp_cpu_offload": {
"type": "boolean"
}
},
"additionalProperties": false,
"required": [
"n_epochs",
"batch_size",
"shuffle",
"n_iters",
"enable_activation_checkpointing",
"memory_efficient_fsdp_wrap",
"fsdp_cpu_offload"
]
},
"RewardScoringRequest": { "RewardScoringRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -2727,7 +2935,69 @@
], ],
"title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold." "title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
}, },
"FinetuningTrainRequest": { "DoraFinetuningConfig": {
"type": "object",
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
},
"LoraFinetuningConfig": {
"type": "object",
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
},
"PostTrainingSFTRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"job_uuid": { "job_uuid": {
@ -2761,66 +3031,10 @@
"$ref": "#/components/schemas/LoraFinetuningConfig" "$ref": "#/components/schemas/LoraFinetuningConfig"
}, },
{ {
"type": "object", "$ref": "#/components/schemas/QLoraFinetuningConfig"
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
}, },
{ {
"type": "object", "$ref": "#/components/schemas/DoraFinetuningConfig"
"properties": {
"lora_attn_modules": {
"type": "array",
"items": {
"type": "string"
}
},
"apply_lora_to_mlp": {
"type": "boolean"
},
"apply_lora_to_output": {
"type": "boolean"
},
"rank": {
"type": "integer"
},
"alpha": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"lora_attn_modules",
"apply_lora_to_mlp",
"apply_lora_to_output",
"rank",
"alpha"
]
} }
] ]
}, },
@ -2896,7 +3110,7 @@
], ],
"title": "Request to finetune a model." "title": "Request to finetune a model."
}, },
"LoraFinetuningConfig": { "QLoraFinetuningConfig": {
"type": "object", "type": "object",
"properties": { "properties": {
"lora_attn_modules": { "lora_attn_modules": {
@ -2926,71 +3140,6 @@
"rank", "rank",
"alpha" "alpha"
] ]
},
"OptimizerConfig": {
"type": "object",
"properties": {
"optimizer_type": {
"type": "string",
"enum": [
"adam",
"adamw",
"sgd"
]
},
"lr": {
"type": "number"
},
"lr_min": {
"type": "number"
},
"weight_decay": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"optimizer_type",
"lr",
"lr_min",
"weight_decay"
]
},
"TrainingConfig": {
"type": "object",
"properties": {
"n_epochs": {
"type": "integer"
},
"batch_size": {
"type": "integer"
},
"shuffle": {
"type": "boolean"
},
"n_iters": {
"type": "integer"
},
"enable_activation_checkpointing": {
"type": "boolean"
},
"memory_efficient_fsdp_wrap": {
"type": "boolean"
},
"fsdp_cpu_offload": {
"type": "boolean"
}
},
"additionalProperties": false,
"required": [
"n_epochs",
"batch_size",
"shuffle",
"n_iters",
"enable_activation_checkpointing",
"memory_efficient_fsdp_wrap",
"fsdp_cpu_offload"
]
} }
}, },
"responses": {} "responses": {}
@ -3001,27 +3150,27 @@
} }
], ],
"tags": [ "tags": [
{
"name": "RewardScoring"
},
{
"name": "MemoryBanks"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "Finetuning"
},
{ {
"name": "AgenticSystem" "name": "AgenticSystem"
}, },
{
"name": "RewardScoring"
},
{ {
"name": "Inference" "name": "Inference"
}, },
{
"name": "SyntheticDataGeneration"
},
{ {
"name": "Datasets" "name": "Datasets"
}, },
{
"name": "PostTraining"
},
{
"name": "MemoryBanks"
},
{ {
"name": "ShieldConfig", "name": "ShieldConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />"
@ -3075,16 +3224,16 @@
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBank\" />"
}, },
{ {
"name": "FinetuningJobArtifactsResponse", "name": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobArtifactsResponse\" />" "description": "Artifacts of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobArtifactsResponse\" />"
}, },
{ {
"name": "FinetuningJobStatusResponse", "name": "PostTrainingJobStatusResponse",
"description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobStatusResponse\" />" "description": "Status of a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobStatusResponse\" />"
}, },
{ {
"name": "FinetuningJobLogStream", "name": "PostTrainingJobLogStream",
"description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningJobLogStream\" />" "description": "Stream of logs from a finetuning job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingJobLogStream\" />"
}, },
{ {
"name": "BatchChatCompletionRequest", "name": "BatchChatCompletionRequest",
@ -3138,6 +3287,22 @@
"name": "SyntheticDataGenerationResponse", "name": "SyntheticDataGenerationResponse",
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />" "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
}, },
{
"name": "DPOAlignmentConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DPOAlignmentConfig\" />"
},
{
"name": "OptimizerConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
},
{
"name": "PostTrainingRLHFRequest",
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingRLHFRequest\" />"
},
{
"name": "TrainingConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />"
},
{ {
"name": "RewardScoringRequest", "name": "RewardScoringRequest",
"description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />" "description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
@ -3147,20 +3312,20 @@
"description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />" "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
}, },
{ {
"name": "FinetuningTrainRequest", "name": "DoraFinetuningConfig",
"description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/FinetuningTrainRequest\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DoraFinetuningConfig\" />"
}, },
{ {
"name": "LoraFinetuningConfig", "name": "LoraFinetuningConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/LoraFinetuningConfig\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/LoraFinetuningConfig\" />"
}, },
{ {
"name": "OptimizerConfig", "name": "PostTrainingSFTRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />" "description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingSFTRequest\" />"
}, },
{ {
"name": "TrainingConfig", "name": "QLoraFinetuningConfig",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainingConfig\" />" "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QLoraFinetuningConfig\" />"
} }
], ],
"x-tagGroups": [ "x-tagGroups": [
@ -3169,9 +3334,9 @@
"tags": [ "tags": [
"AgenticSystem", "AgenticSystem",
"Datasets", "Datasets",
"Finetuning",
"Inference", "Inference",
"MemoryBanks", "MemoryBanks",
"PostTraining",
"RewardScoring", "RewardScoring",
"SyntheticDataGeneration" "SyntheticDataGeneration"
] ]
@ -3195,18 +3360,22 @@
"CompletionResponse", "CompletionResponse",
"CompletionResponseStreamChunk", "CompletionResponseStreamChunk",
"CreateDatasetRequest", "CreateDatasetRequest",
"DPOAlignmentConfig",
"Dataset", "Dataset",
"Dialog", "Dialog",
"FinetuningJobArtifactsResponse", "DoraFinetuningConfig",
"FinetuningJobLogStream",
"FinetuningJobStatusResponse",
"FinetuningTrainRequest",
"KScoredPromptGenerations", "KScoredPromptGenerations",
"LoraFinetuningConfig", "LoraFinetuningConfig",
"MemoryBank", "MemoryBank",
"Message", "Message",
"MessageScore", "MessageScore",
"OptimizerConfig", "OptimizerConfig",
"PostTrainingJobArtifactsResponse",
"PostTrainingJobLogStream",
"PostTrainingJobStatusResponse",
"PostTrainingRLHFRequest",
"PostTrainingSFTRequest",
"QLoraFinetuningConfig",
"RewardScoringRequest", "RewardScoringRequest",
"RewardScoringResponse", "RewardScoringResponse",
"ShieldConfig", "ShieldConfig",

View file

@ -879,6 +879,23 @@ components:
- dataset - dataset
title: Request to create a dataset. title: Request to create a dataset.
type: object type: object
DPOAlignmentConfig:
additionalProperties: false
properties:
epsilon:
type: number
gamma:
type: number
reward_clip:
type: number
reward_scale:
type: number
required:
- reward_scale
- reward_clip
- epsilon
- gamma
type: object
Dataset: Dataset:
additionalProperties: false additionalProperties: false
properties: properties:
@ -923,195 +940,27 @@ components:
- message - message
- message_history - message_history
type: object type: object
FinetuningJobArtifactsResponse: DoraFinetuningConfig:
additionalProperties: false additionalProperties: false
properties: properties:
checkpoints: alpha:
items: type: integer
additionalProperties: false apply_lora_to_mlp:
properties: type: boolean
iters: apply_lora_to_output:
type: integer type: boolean
path: lora_attn_modules:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
job_uuid:
type: string
required:
- job_uuid
- checkpoints
title: Artifacts of a finetuning job.
type: object
FinetuningJobLogStream:
additionalProperties: false
properties:
job_uuid:
type: string
log_lines:
items: items:
type: string type: string
type: array type: array
rank:
type: integer
required: required:
- job_uuid - lora_attn_modules
- log_lines - apply_lora_to_mlp
title: Stream of logs from a finetuning job. - apply_lora_to_output
type: object - rank
FinetuningJobStatusResponse: - alpha
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
completed_at:
format: date-time
type: string
job_uuid:
type: string
resources_allocated:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
scheduled_at:
format: date-time
type: string
started_at:
format: date-time
type: string
status:
enum:
- running
- completed
- failed
- scheduled
type: string
required:
- job_uuid
- status
- checkpoints
title: Status of a finetuning job.
type: object
FinetuningTrainRequest:
additionalProperties: false
properties:
algorithm:
enum:
- full
- lora
- qlora
- dora
type: string
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
- additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
dataset:
$ref: '#/components/schemas/Dataset'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model:
enum:
- llama3_8b
- llama3_70b
type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object type: object
KScoredPromptGenerations: KScoredPromptGenerations:
additionalProperties: false additionalProperties: false
@ -1259,6 +1108,232 @@ components:
- lr_min - lr_min
- weight_decay - weight_decay
type: object type: object
PostTrainingJobArtifactsResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
job_uuid:
type: string
required:
- job_uuid
- checkpoints
title: Artifacts of a finetuning job.
type: object
PostTrainingJobLogStream:
additionalProperties: false
properties:
job_uuid:
type: string
log_lines:
items:
type: string
type: array
required:
- job_uuid
- log_lines
title: Stream of logs from a finetuning job.
type: object
PostTrainingJobStatusResponse:
additionalProperties: false
properties:
checkpoints:
items:
additionalProperties: false
properties:
iters:
type: integer
path:
$ref: '#/components/schemas/URL'
required:
- iters
- path
type: object
type: array
completed_at:
format: date-time
type: string
job_uuid:
type: string
resources_allocated:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
scheduled_at:
format: date-time
type: string
started_at:
format: date-time
type: string
status:
enum:
- running
- completed
- failed
- scheduled
type: string
required:
- job_uuid
- status
- checkpoints
title: Status of a finetuning job.
type: object
PostTrainingRLHFRequest:
additionalProperties: false
properties:
algorithm:
enum:
- dpo
type: string
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
dataset:
$ref: '#/components/schemas/Dataset'
finetuned_model:
$ref: '#/components/schemas/URL'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- finetuned_model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
PostTrainingSFTRequest:
additionalProperties: false
properties:
algorithm:
enum:
- full
- lora
- qlora
- dora
type: string
algorithm_config:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- $ref: '#/components/schemas/QLoraFinetuningConfig'
- $ref: '#/components/schemas/DoraFinetuningConfig'
dataset:
$ref: '#/components/schemas/Dataset'
hyperparam_search_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
job_uuid:
type: string
logger_config:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model:
enum:
- llama3_8b
- llama3_70b
type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
$ref: '#/components/schemas/Dataset'
required:
- job_uuid
- model
- dataset
- validation_dataset
- algorithm
- algorithm_config
- optimizer_config
- training_config
- hyperparam_search_config
- logger_config
title: Request to finetune a model.
type: object
QLoraFinetuningConfig:
additionalProperties: false
properties:
alpha:
type: integer
apply_lora_to_mlp:
type: boolean
apply_lora_to_output:
type: boolean
lora_attn_modules:
items:
type: string
type: array
rank:
type: integer
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
type: object
RewardScoringRequest: RewardScoringRequest:
additionalProperties: false additionalProperties: false
properties: properties:
@ -1581,71 +1656,6 @@ paths:
description: OK description: OK
tags: tags:
- Datasets - Datasets
/finetuning/job/artifacts:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobArtifactsResponse'
description: OK
tags:
- Finetuning
/finetuning/job/logs:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobLogStream'
description: OK
tags:
- Finetuning
/finetuning/job/status:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningJobStatusResponse'
description: OK
tags:
- Finetuning
/finetuning/text_generation/train:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/FinetuningTrainRequest'
required: true
responses:
'200':
description: OK
tags:
- Finetuning
/memory_banks/create: /memory_banks/create:
post: post:
parameters: parameters:
@ -1787,6 +1797,85 @@ paths:
description: OK description: OK
tags: tags:
- MemoryBanks - MemoryBanks
/post_training/job/artifacts:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
description: OK
tags:
- PostTraining
/post_training/job/logs:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobLogStream'
description: OK
tags:
- PostTraining
/post_training/job/status:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
description: OK
tags:
- PostTraining
/post_training/preference_optimize/:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingRLHFRequest'
required: true
responses:
'200':
description: OK
tags:
- PostTraining
/post_training/supervised_fine_tune/:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingSFTRequest'
required: true
responses:
'200':
description: OK
tags:
- PostTraining
/reward_scoring/score: /reward_scoring/score:
post: post:
parameters: [] parameters: []
@ -1828,13 +1917,13 @@ security:
servers: servers:
- url: http://llama.meta.com - url: http://llama.meta.com
tags: tags:
- name: RewardScoring
- name: MemoryBanks
- name: SyntheticDataGeneration
- name: Finetuning
- name: AgenticSystem - name: AgenticSystem
- name: RewardScoring
- name: Inference - name: Inference
- name: SyntheticDataGeneration
- name: Datasets - name: Datasets
- name: PostTraining
- name: MemoryBanks
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" /> - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
name: ShieldConfig name: ShieldConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest" - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
@ -1888,20 +1977,20 @@ tags:
- description: 'Artifacts of a finetuning job. - description: 'Artifacts of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobArtifactsResponse" <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobArtifactsResponse"
/>' />'
name: FinetuningJobArtifactsResponse name: PostTrainingJobArtifactsResponse
- description: 'Status of a finetuning job. - description: 'Status of a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobStatusResponse" <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobStatusResponse"
/>' />'
name: FinetuningJobStatusResponse name: PostTrainingJobStatusResponse
- description: 'Stream of logs from a finetuning job. - description: 'Stream of logs from a finetuning job.
<SchemaDefinition schemaRef="#/components/schemas/FinetuningJobLogStream" />' <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJobLogStream" />'
name: FinetuningJobLogStream name: PostTrainingJobLogStream
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest" - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
/> />
name: BatchChatCompletionRequest name: BatchChatCompletionRequest
@ -1961,6 +2050,19 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse" <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
/>' />'
name: SyntheticDataGenerationResponse name: SyntheticDataGenerationResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/DPOAlignmentConfig"
/>
name: DPOAlignmentConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
/>
name: OptimizerConfig
- description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingRLHFRequest" />'
name: PostTrainingRLHFRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
name: TrainingConfig
- description: 'Request to score a reward function. A list of prompts and a list of - description: 'Request to score a reward function. A list of prompts and a list of
responses per prompt. responses per prompt.
@ -1973,27 +2075,28 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />' <SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
name: RewardScoringResponse name: RewardScoringResponse
- description: 'Request to finetune a model. - description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
/>
name: DoraFinetuningConfig
<SchemaDefinition schemaRef="#/components/schemas/FinetuningTrainRequest" />'
name: FinetuningTrainRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig" - description: <SchemaDefinition schemaRef="#/components/schemas/LoraFinetuningConfig"
/> />
name: LoraFinetuningConfig name: LoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig" - description: 'Request to finetune a model.
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
name: PostTrainingSFTRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
/> />
name: OptimizerConfig name: QLoraFinetuningConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/TrainingConfig" />
name: TrainingConfig
x-tagGroups: x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- AgenticSystem - AgenticSystem
- Datasets - Datasets
- Finetuning
- Inference - Inference
- MemoryBanks - MemoryBanks
- PostTraining
- RewardScoring - RewardScoring
- SyntheticDataGeneration - SyntheticDataGeneration
- name: Types - name: Types
@ -2014,18 +2117,22 @@ x-tagGroups:
- CompletionResponse - CompletionResponse
- CompletionResponseStreamChunk - CompletionResponseStreamChunk
- CreateDatasetRequest - CreateDatasetRequest
- DPOAlignmentConfig
- Dataset - Dataset
- Dialog - Dialog
- FinetuningJobArtifactsResponse - DoraFinetuningConfig
- FinetuningJobLogStream
- FinetuningJobStatusResponse
- FinetuningTrainRequest
- KScoredPromptGenerations - KScoredPromptGenerations
- LoraFinetuningConfig - LoraFinetuningConfig
- MemoryBank - MemoryBank
- Message - Message
- MessageScore - MessageScore
- OptimizerConfig - OptimizerConfig
- PostTrainingJobArtifactsResponse
- PostTrainingJobLogStream
- PostTrainingJobStatusResponse
- PostTrainingRLHFRequest
- PostTrainingSFTRequest
- QLoraFinetuningConfig
- RewardScoringRequest - RewardScoringRequest
- RewardScoringResponse - RewardScoringResponse
- ShieldConfig - ShieldConfig

View file

@ -72,11 +72,13 @@ class LoraFinetuningConfig:
alpha: int alpha: int
@json_schema_type
@dataclass @dataclass
class QLoraFinetuningConfig(LoraFinetuningConfig): class QLoraFinetuningConfig(LoraFinetuningConfig):
pass pass
@json_schema_type
@dataclass @dataclass
class DoraFinetuningConfig(LoraFinetuningConfig): class DoraFinetuningConfig(LoraFinetuningConfig):
pass pass
@ -84,14 +86,14 @@ class DoraFinetuningConfig(LoraFinetuningConfig):
@json_schema_type @json_schema_type
@dataclass @dataclass
class FinetuningJobLogStream: class PostTrainingJobLogStream:
"""Stream of logs from a finetuning job.""" """Stream of logs from a finetuning job."""
job_uuid: str job_uuid: str
log_lines: List[str] log_lines: List[str]
class FinetuningJobStatus(Enum): class PostTrainingJobStatus(Enum):
running = "running" running = "running"
completed = "completed" completed = "completed"
failed = "failed" failed = "failed"
@ -102,3 +104,16 @@ class FinetuningJobStatus(Enum):
class Checkpoint: class Checkpoint:
iters: int iters: int
path: URL path: URL
class RLHFAlgorithm(Enum):
dpo = "dpo"
@json_schema_type
@dataclass
class DPOAlignmentConfig:
reward_scale: float
reward_clip: float
epsilon: float
gamma: float