temp commit

This commit is contained in:
Botao Chen 2024-11-26 21:23:56 -08:00
parent 90add9fed0
commit c31a78dfcb
7 changed files with 92 additions and 19 deletions

View file

@ -16,7 +16,8 @@ from pydantic import BaseModel, Field
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.datasets import * # noqa: F403 from llama_stack.apis.datasets import * # noqa: F403
from llama_stack.apis.common.training_types import * # noqa: F403 from llama_stack.apis.common.training_types import * # noqa: F403
import torch
# import torch
class OptimizerType(Enum): class OptimizerType(Enum):
@ -36,7 +37,7 @@ class OptimizerConfig(BaseModel):
@json_schema_type @json_schema_type
class TrainingConfig(BaseModel): class TrainingConfig(BaseModel):
dtype: torch.dtype dtype: str
n_epochs: int n_epochs: int
max_steps_per_epoch: int max_steps_per_epoch: int
gradient_accumulation_steps: int gradient_accumulation_steps: int
@ -116,9 +117,7 @@ class PostTrainingSFTRequest(BaseModel):
validation_dataset_id: str validation_dataset_id: str
algorithm: FinetuningAlgorithm algorithm: FinetuningAlgorithm
algorithm_config: Union[ algorithm_config: LoraFinetuningConfig
LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig
]
optimizer_config: OptimizerConfig optimizer_config: OptimizerConfig
training_config: TrainingConfig training_config: TrainingConfig
@ -189,9 +188,7 @@ class PostTraining(Protocol):
dataset_id: str, dataset_id: str,
validation_dataset_id: str, validation_dataset_id: str,
algorithm: FinetuningAlgorithm, algorithm: FinetuningAlgorithm,
algorithm_config: Union[ algorithm_config: LoraFinetuningConfig,
LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig
],
optimizer_config: OptimizerConfig, optimizer_config: OptimizerConfig,
training_config: TrainingConfig, training_config: TrainingConfig,
hyperparam_search_config: Dict[str, Any], hyperparam_search_config: Dict[str, Any],
@ -206,7 +203,7 @@ class PostTraining(Protocol):
dataset_id: str, dataset_id: str,
validation_dataset_id: str, validation_dataset_id: str,
algorithm: RLHFAlgorithm, algorithm: RLHFAlgorithm,
algorithm_config: Union[DPOAlignmentConfig], algorithm_config: DPOAlignmentConfig,
optimizer_config: OptimizerConfig, optimizer_config: OptimizerConfig,
training_config: TrainingConfig, training_config: TrainingConfig,
hyperparam_search_config: Dict[str, Any], hyperparam_search_config: Dict[str, Any],

View file

@ -24,6 +24,7 @@ from llama_stack.apis.inspect import Inspect
from llama_stack.apis.memory import Memory from llama_stack.apis.memory import Memory
from llama_stack.apis.memory_banks import MemoryBanks from llama_stack.apis.memory_banks import MemoryBanks
from llama_stack.apis.models import Models from llama_stack.apis.models import Models
from llama_stack.apis.post_training import PostTraining
from llama_stack.apis.safety import Safety from llama_stack.apis.safety import Safety
from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring import Scoring
from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.scoring_functions import ScoringFunctions
@ -58,6 +59,7 @@ def api_protocol_map() -> Dict[Api, Any]:
Api.scoring_functions: ScoringFunctions, Api.scoring_functions: ScoringFunctions,
Api.eval: Eval, Api.eval: Eval,
Api.eval_tasks: EvalTasks, Api.eval_tasks: EvalTasks,
Api.post_training: PostTraining,
} }

View file

@ -8,6 +8,7 @@ from llama_stack.providers.inline.post_training.meta_reference.config import (
MetaReferencePostTrainingConfig, MetaReferencePostTrainingConfig,
) )
from llama_stack.apis.post_training import * # noqa from llama_stack.apis.post_training import * # noqa
from llama_stack.providers.inline.post_training.meta_reference.recipes.lora_finetuning_single_device import ( from llama_stack.providers.inline.post_training.meta_reference.recipes.lora_finetuning_single_device import (
LoraFinetuningSingleDevice, LoraFinetuningSingleDevice,
) )
@ -20,17 +21,45 @@ class MetaReferencePostTrainingImpl:
self.config = config self.config = config
self.datasetio_api = datasetio_api self.datasetio_api = datasetio_api
LoraFinetuningConfig(
lora_attn_modules=["q_proj", "v_proj", "output_proj"],
apply_lora_to_mlp=True,
apply_lora_to_output=False,
rank=8,
alpha=16,
)
OptimizerConfig(
lr=3e-4,
lr_min=3e-5,
weight_decay=0.1,
num_warmup_steps=100,
)
TrainingConfig(
dtype="bf16",
n_epochs=1,
max_steps_per_epoch=10,
gradient_accumulation_steps=1,
batch_size=1,
shuffle=1,
enable_activation_checkpointing=False,
memory_efficient_fsdp_wrap=False,
fsdp_cpu_offload=False,
)
def supervised_fine_tune( def supervised_fine_tune(
self, self,
job_uuid: str, job_uuid: str = "1234",
model: str, model: str = " meta-llama/Llama-3.2-3B-Instruct",
dataset_id: str, dataset_id: str = "alpaca",
validation_dataset_id: str, validation_dataset_id: str = "alpaca",
algorithm: FinetuningAlgorithm, algorithm: FinetuningAlgorithm = FinetuningAlgorithm.lora,
algorithm_config: LoraFinetuningConfig, algorithm_config: LoraFinetuningConfig = LoraFinetuningConfig,
optimizer_config: OptimizerConfig, optimizer_config: OptimizerConfig = OptimizerConfig,
training_config: TrainingConfig, training_config: TrainingConfig = TrainingConfig,
logger_config: Dict[str, Any], hyperparam_search_config: Dict[str, Any] = {},
logger_config: Dict[str, Any] = {},
) -> PostTrainingJob: ) -> PostTrainingJob:
# wrapper request to make it easier to pass around (internal only, not exposed to API) # wrapper request to make it easier to pass around (internal only, not exposed to API)
request = PostTrainingSFTRequest( request = PostTrainingSFTRequest(
@ -54,3 +83,36 @@ class MetaReferencePostTrainingImpl:
raise NotImplementedError() raise NotImplementedError()
return PostTrainingJob(job_uuid=job_uuid) return PostTrainingJob(job_uuid=job_uuid)
def preference_optimize(
self,
job_uuid: str,
finetuned_model: URL,
dataset_id: str,
validation_dataset_id: str,
algorithm: RLHFAlgorithm,
algorithm_config: DPOAlignmentConfig,
optimizer_config: OptimizerConfig,
training_config: TrainingConfig,
hyperparam_search_config: Dict[str, Any],
logger_config: Dict[str, Any],
) -> PostTrainingJob: ...
def get_training_jobs(self) -> List[PostTrainingJob]: ...
# sends SSE stream of logs
@webmethod(route="/post-training/job/logs")
def get_training_job_logstream(self, job_uuid: str) -> PostTrainingJobLogStream: ...
@webmethod(route="/post-training/job/status")
def get_training_job_status(
self, job_uuid: str
) -> PostTrainingJobStatusResponse: ...
@webmethod(route="/post-training/job/cancel")
def cancel_training_job(self, job_uuid: str) -> None: ...
@webmethod(route="/post-training/job/artifacts")
def get_training_job_artifacts(
self, job_uuid: str
) -> PostTrainingJobArtifactsResponse: ...

View file

@ -38,7 +38,7 @@ from torchtune.modules.peft import (
set_trainable_params, set_trainable_params,
validate_missing_and_unexpected_for_lora, validate_missing_and_unexpected_for_lora,
) )
from torchtune.training.lr_scheduler import get_cosine_schedule_with_warmup from torchtune.training.lr_schedulers import get_cosine_schedule_with_warmup
log = logging.getLogger(__name__) log = logging.getLogger(__name__)

View file

@ -12,6 +12,7 @@ from llama_stack.distribution.datatypes import * # noqa: F403
META_REFERENCE_DEPS = [ META_REFERENCE_DEPS = [
"torch", "torch",
"torchtune", "torchtune",
"torchao",
"numpy", "numpy",
] ]
@ -24,5 +25,8 @@ def available_providers() -> List[ProviderSpec]:
pip_packages=META_REFERENCE_DEPS, pip_packages=META_REFERENCE_DEPS,
module="llama_stack.providers.inline.post_training.meta_reference", module="llama_stack.providers.inline.post_training.meta_reference",
config_class="llama_stack.providers.inline.post_training.meta_reference.MetaReferencePostTrainingConfig", config_class="llama_stack.providers.inline.post_training.meta_reference.MetaReferencePostTrainingConfig",
api_dependencies=[
Api.datasetio,
],
), ),
] ]

View file

@ -6,6 +6,8 @@ distribution_spec:
providers: providers:
post_training: post_training:
- inline::meta-reference - inline::meta-reference
datasetio:
- remote::huggingface
inference: inference:
- inline::meta-reference - inline::meta-reference
memory: memory:

View file

@ -8,6 +8,8 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- datasetio
- post_training
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -16,6 +18,10 @@ providers:
model: ${env.INFERENCE_MODEL} model: ${env.INFERENCE_MODEL}
max_seq_len: 4096 max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null} checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
datasetio:
- provider_id: huggingface-0
provider_type: remote::huggingface
config: {}
memory: memory:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss