mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-16 14:38:00 +00:00
Some checks failed
Integration Tests / discover-tests (push) Has been skipped
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 7s
Integration Tests / record-tests (push) Has been skipped
Integration Tests / run-tests (push) Has been skipped
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 22s
Python Package Build Test / build (3.13) (push) Failing after 16s
Test Llama Stack Build / generate-matrix (push) Successful in 19s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 21s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 31s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 32s
Test External API and Providers / test-external (venv) (push) Failing after 32s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 36s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 39s
Update ReadTheDocs / update-readthedocs (push) Failing after 31s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 42s
Test Llama Stack Build / build-single-provider (push) Failing after 37s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 35s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 37s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 40s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 42s
Unit Tests / unit-tests (3.12) (push) Failing after 36s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 40s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 45s
Test Llama Stack Build / build (push) Failing after 6s
Python Package Build Test / build (3.12) (push) Failing after 1m1s
Unit Tests / unit-tests (3.13) (push) Failing after 1m0s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 1m6s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 1m8s
Pre-commit / pre-commit (push) Successful in 1m50s
What does this PR do? This PR adds support for Direct Preference Optimization (DPO) training via the existing HuggingFace inline provider. It introduces a new DPO training recipe, config schema updates, dataset integration, and end-to-end testing to support preference-based fine-tuning with TRL. Test Plan Added integration test: tests/integration/post_training/test_post_training.py::TestPostTraining::test_preference_optimize Ran tests on both CPU and CUDA environments --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-43-83.ec2.internal> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
78 lines
2.9 KiB
Python
78 lines
2.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from typing import Any, Literal
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class HuggingFacePostTrainingConfig(BaseModel):
|
|
# Device to run training on (cuda, cpu, mps)
|
|
device: str = "cuda"
|
|
|
|
# Distributed training backend if using multiple devices
|
|
# fsdp: Fully Sharded Data Parallel
|
|
# deepspeed: DeepSpeed ZeRO optimization
|
|
distributed_backend: Literal["fsdp", "deepspeed"] | None = None
|
|
|
|
# Format for saving model checkpoints
|
|
# full_state: Save complete model state
|
|
# huggingface: Save in HuggingFace format (recommended for compatibility)
|
|
checkpoint_format: Literal["full_state", "huggingface"] | None = "huggingface"
|
|
|
|
# Template for formatting chat inputs and outputs
|
|
# Used to structure the conversation format for training
|
|
chat_template: str = "<|user|>\n{input}\n<|assistant|>\n{output}"
|
|
|
|
# Model-specific configuration parameters
|
|
# trust_remote_code: Allow execution of custom model code
|
|
# attn_implementation: Use SDPA (Scaled Dot Product Attention) for better performance
|
|
model_specific_config: dict = {
|
|
"trust_remote_code": True,
|
|
"attn_implementation": "sdpa",
|
|
}
|
|
|
|
# Maximum sequence length for training
|
|
# Set to 2048 as this is the maximum that works reliably on MPS (Apple Silicon)
|
|
# Longer sequences may cause memory issues on MPS devices
|
|
max_seq_length: int = 2048
|
|
|
|
# Enable gradient checkpointing to reduce memory usage
|
|
# Trades computation for memory by recomputing activations
|
|
gradient_checkpointing: bool = False
|
|
|
|
# Maximum number of checkpoints to keep
|
|
# Older checkpoints are deleted when this limit is reached
|
|
save_total_limit: int = 3
|
|
|
|
# Number of training steps between logging updates
|
|
logging_steps: int = 10
|
|
|
|
# Ratio of training steps used for learning rate warmup
|
|
# Helps stabilize early training
|
|
warmup_ratio: float = 0.1
|
|
|
|
# L2 regularization coefficient
|
|
# Helps prevent overfitting
|
|
weight_decay: float = 0.01
|
|
|
|
# Number of worker processes for data loading
|
|
# Higher values can improve data loading speed but increase memory usage
|
|
dataloader_num_workers: int = 4
|
|
|
|
# Whether to pin memory in data loader
|
|
# Can improve data transfer speed to GPU but uses more memory
|
|
dataloader_pin_memory: bool = True
|
|
|
|
# DPO-specific parameters
|
|
dpo_beta: float = 0.1
|
|
use_reference_model: bool = True
|
|
dpo_loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid"
|
|
dpo_output_dir: str = "./checkpoints/dpo"
|
|
|
|
@classmethod
|
|
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
|
return {"checkpoint_format": "huggingface", "distributed_backend": None, "device": "cpu"}
|