chore(misc): make tests and starter faster (#3042)

A bunch of miscellaneous cleanup focusing on tests, but ended up speeding up starter distro substantially. - Pulled llama stack client init for tests into `pytest_sessionstart` so it does not clobber output - Profiling of that told me where we were doing lots of heavy imports for starter, so lazied them - starter now starts 20seconds+ faster on my Mac - A few other smallish refactors for `compat_client`
2025-12-03 18:00:36 +00:00 · 2025-08-05 14:55:05 -07:00 · 2025-08-05 14:55:05 -07:00 · 7f834339ba
commit 7f834339ba
parent e12524af85
45 changed files with 2897 additions and 1688 deletions
--- a/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py
@ -23,12 +23,8 @@ from llama_stack.apis.post_training import (
 from llama_stack.providers.inline.post_training.torchtune.config import (
    TorchtunePostTrainingConfig,
 )
-from llama_stack.providers.inline.post_training.torchtune.recipes.lora_finetuning_single_device import (
-    LoraFinetuningSingleDevice,
-)
 from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
 from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-from llama_stack.schema_utils import webmethod


 class TrainingArtifactType(Enum):
@ -84,6 +80,10 @@ class TorchtunePostTrainingImpl:
        if isinstance(algorithm_config, LoraFinetuningConfig):

            async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb):
+                from llama_stack.providers.inline.post_training.torchtune.recipes.lora_finetuning_single_device import (
+                    LoraFinetuningSingleDevice,
+                )
+
                on_log_message_cb("Starting Lora finetuning")

                recipe = LoraFinetuningSingleDevice(
@ -144,7 +144,6 @@ class TorchtunePostTrainingImpl:
        data = cls._get_artifacts_metadata_by_type(job, TrainingArtifactType.RESOURCES_STATS.value)
        return data[0] if data else None

-    @webmethod(route="/post-training/job/status")
    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse | None:
        job = self._scheduler.get_job(job_uuid)

@ -171,11 +170,9 @@ class TorchtunePostTrainingImpl:
            resources_allocated=self._get_resources_allocated(job),
        )

-    @webmethod(route="/post-training/job/cancel")
    async def cancel_training_job(self, job_uuid: str) -> None:
        self._scheduler.cancel(job_uuid)

-    @webmethod(route="/post-training/job/artifacts")
    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None:
        job = self._scheduler.get_job(job_uuid)
        return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job))