torchtune: save job status on schedule

Otherwise a failed job never gets registered to extract status, even if
it's still present in job_list.

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
Ihar Hrachyshka 2025-02-18 09:26:56 -05:00
parent 80070b966a
commit 07a1d44f4c

View file

@ -65,6 +65,7 @@ class TorchtunePostTrainingImpl:
status=JobStatus.scheduled,
scheduled_at=datetime.now(),
)
self.jobs_status[job_uuid] = job_status_response
self.jobs_list.append(post_training_job)
if isinstance(algorithm_config, LoraFinetuningConfig):
@ -100,8 +101,6 @@ class TorchtunePostTrainingImpl:
else:
raise NotImplementedError()
self.jobs_status[job_uuid] = job_status_response
return post_training_job
async def preference_optimize(