diff --git a/docs/source/providers/post_training/index.md b/docs/source/providers/post_training/index.md index 5ada6f9aa..e69f2a45a 100644 --- a/docs/source/providers/post_training/index.md +++ b/docs/source/providers/post_training/index.md @@ -9,7 +9,6 @@ This section contains documentation for all available providers for the **post_t ```{toctree} :maxdepth: 1 -inline_huggingface-cpu inline_huggingface-gpu inline_torchtune-cpu inline_torchtune-gpu diff --git a/llama_stack/core/build.py b/llama_stack/core/build.py index fa1fe632b..2ceb9e9be 100644 --- a/llama_stack/core/build.py +++ b/llama_stack/core/build.py @@ -80,7 +80,7 @@ def get_provider_dependencies( normal_deps = [] special_deps = [] for package in deps: - if "--no-deps" in package or "--index-url" in package: + if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]): special_deps.append(package) else: normal_deps.append(package) diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml index b4701cb81..8e6c0bf67 100644 --- a/llama_stack/distributions/ci-tests/build.yaml +++ b/llama_stack/distributions/ci-tests/build.yaml @@ -34,7 +34,7 @@ distribution_spec: telemetry: - provider_type: inline::meta-reference post_training: - - provider_type: inline::huggingface-cpu + - provider_type: inline::torchtune-cpu eval: - provider_type: inline::meta-reference datasetio: diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index 3acdd20f9..7523df581 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -156,13 +156,10 @@ providers: sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} post_training: - - provider_id: huggingface-cpu - provider_type: inline::huggingface-cpu + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu config: - checkpoint_format: huggingface - distributed_backend: null - device: cpu - dpo_output_dir: ~/.llama/distributions/ci-tests/dpo_output + checkpoint_format: meta eval: - provider_id: meta-reference provider_type: inline::meta-reference diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml index ae0680cdc..ff7c58e6f 100644 --- a/llama_stack/distributions/starter-gpu/build.yaml +++ b/llama_stack/distributions/starter-gpu/build.yaml @@ -35,7 +35,7 @@ distribution_spec: telemetry: - provider_type: inline::meta-reference post_training: - - provider_type: inline::torchtune-gpu + - provider_type: inline::huggingface-gpu eval: - provider_type: inline::meta-reference datasetio: diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 81c802317..8aed61519 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -156,10 +156,13 @@ providers: sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} post_training: - - provider_id: torchtune-gpu - provider_type: inline::torchtune-gpu + - provider_id: huggingface-gpu + provider_type: inline::huggingface-gpu config: - checkpoint_format: meta + checkpoint_format: huggingface + distributed_backend: null + device: cpu + dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output eval: - provider_id: meta-reference provider_type: inline::meta-reference diff --git a/llama_stack/distributions/starter-gpu/starter_gpu.py b/llama_stack/distributions/starter-gpu/starter_gpu.py index 893df6c17..245334749 100644 --- a/llama_stack/distributions/starter-gpu/starter_gpu.py +++ b/llama_stack/distributions/starter-gpu/starter_gpu.py @@ -17,6 +17,6 @@ def get_distribution_template() -> DistributionTemplate: template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments." template.providers["post_training"] = [ - BuildProvider(provider_type="inline::torchtune-gpu"), + BuildProvider(provider_type="inline::huggingface-gpu"), ] return template diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml index 3df0eb129..e84e528da 100644 --- a/llama_stack/distributions/starter/build.yaml +++ b/llama_stack/distributions/starter/build.yaml @@ -35,7 +35,7 @@ distribution_spec: telemetry: - provider_type: inline::meta-reference post_training: - - provider_type: inline::huggingface-cpu + - provider_type: inline::torchtune-cpu eval: - provider_type: inline::meta-reference datasetio: diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index 7e1d46a61..a3962b8aa 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -156,13 +156,10 @@ providers: sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} post_training: - - provider_id: huggingface-cpu - provider_type: inline::huggingface-cpu + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu config: - checkpoint_format: huggingface - distributed_backend: null - device: cpu - dpo_output_dir: ~/.llama/distributions/starter/dpo_output + checkpoint_format: meta eval: - provider_id: meta-reference provider_type: inline::meta-reference diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py index f49da0bb7..a4bbc6371 100644 --- a/llama_stack/distributions/starter/starter.py +++ b/llama_stack/distributions/starter/starter.py @@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate: ], "agents": [BuildProvider(provider_type="inline::meta-reference")], "telemetry": [BuildProvider(provider_type="inline::meta-reference")], - "post_training": [BuildProvider(provider_type="inline::huggingface-cpu")], + "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")], "eval": [BuildProvider(provider_type="inline::meta-reference")], "datasetio": [ BuildProvider(provider_type="remote::huggingface"), diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 1801cdcad..82b771a28 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -40,8 +40,9 @@ def available_providers() -> list[ProviderSpec]: InlineProviderSpec( api=Api.inference, provider_type="inline::sentence-transformers", + # CrossEncoder depends on torchao.quantization pip_packages=[ - "torch torchvision --index-url https://download.pytorch.org/whl/cpu", + "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu", "sentence-transformers --no-deps", ], module="llama_stack.providers.inline.inference.sentence_transformers", diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py index 4443f4df1..67238e3fc 100644 --- a/llama_stack/providers/registry/post_training.py +++ b/llama_stack/providers/registry/post_training.py @@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images. torchtune_def = dict( api=Api.post_training, - pip_packages=["torchtune==0.5.0", "torchao==0.8.0", "numpy"], + pip_packages=["numpy"], module="llama_stack.providers.inline.post_training.torchtune", config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig", api_dependencies=[ @@ -23,56 +23,39 @@ torchtune_def = dict( description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.", ) -huggingface_def = dict( - api=Api.post_training, - pip_packages=["trl", "transformers", "peft", "datasets"], - module="llama_stack.providers.inline.post_training.huggingface", - config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig", - api_dependencies=[ - Api.datasetio, - Api.datasets, - ], - description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.", -) - def available_providers() -> list[ProviderSpec]: return [ InlineProviderSpec( - **{ + **{ # type: ignore **torchtune_def, "provider_type": "inline::torchtune-cpu", "pip_packages": ( cast(list[str], torchtune_def["pip_packages"]) - + ["torch torchtune==0.5.0 torchao==0.8.0 --index-url https://download.pytorch.org/whl/cpu"] + + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"] ), }, ), InlineProviderSpec( - **{ - **huggingface_def, - "provider_type": "inline::huggingface-cpu", - "pip_packages": ( - cast(list[str], huggingface_def["pip_packages"]) - + ["torch --index-url https://download.pytorch.org/whl/cpu"] - ), - }, - ), - InlineProviderSpec( - **{ + **{ # type: ignore **torchtune_def, "provider_type": "inline::torchtune-gpu", "pip_packages": ( - cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune==0.5.0 torchao==0.8.0"] + cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"] ), }, ), InlineProviderSpec( - **{ - **huggingface_def, - "provider_type": "inline::huggingface-gpu", - "pip_packages": (cast(list[str], huggingface_def["pip_packages"]) + ["torch"]), - }, + api=Api.post_training, + provider_type="inline::huggingface-gpu", + pip_packages=["trl", "transformers", "peft", "datasets", "torch"], + module="llama_stack.providers.inline.post_training.huggingface", + config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig", + api_dependencies=[ + Api.datasetio, + Api.datasets, + ], + description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.", ), remote_provider_spec( api=Api.post_training,