feat(distro): no huggingface provider for starter (#3258)

The `trl` dependency brings in `accelerate` which brings in nvidia
dependencies for torch. We cannot have that in the starter distro. As
such, no CPU-only post-training for the huggingface provider.
This commit is contained in:
Ashwin Bharambe 2025-08-26 14:06:36 -07:00 committed by GitHub
parent 00bd9a61ed
commit 9fa69b0337
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 35 additions and 55 deletions

View file

@ -9,7 +9,6 @@ This section contains documentation for all available providers for the **post_t
```{toctree} ```{toctree}
:maxdepth: 1 :maxdepth: 1
inline_huggingface-cpu
inline_huggingface-gpu inline_huggingface-gpu
inline_torchtune-cpu inline_torchtune-cpu
inline_torchtune-gpu inline_torchtune-gpu

View file

@ -80,7 +80,7 @@ def get_provider_dependencies(
normal_deps = [] normal_deps = []
special_deps = [] special_deps = []
for package in deps: for package in deps:
if "--no-deps" in package or "--index-url" in package: if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]):
special_deps.append(package) special_deps.append(package)
else: else:
normal_deps.append(package) normal_deps.append(package)

View file

@ -34,7 +34,7 @@ distribution_spec:
telemetry: telemetry:
- provider_type: inline::meta-reference - provider_type: inline::meta-reference
post_training: post_training:
- provider_type: inline::huggingface-cpu - provider_type: inline::torchtune-cpu
eval: eval:
- provider_type: inline::meta-reference - provider_type: inline::meta-reference
datasetio: datasetio:

View file

@ -156,13 +156,10 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training: post_training:
- provider_id: huggingface-cpu - provider_id: torchtune-cpu
provider_type: inline::huggingface-cpu provider_type: inline::torchtune-cpu
config: config:
checkpoint_format: huggingface checkpoint_format: meta
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/ci-tests/dpo_output
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference

View file

@ -35,7 +35,7 @@ distribution_spec:
telemetry: telemetry:
- provider_type: inline::meta-reference - provider_type: inline::meta-reference
post_training: post_training:
- provider_type: inline::torchtune-gpu - provider_type: inline::huggingface-gpu
eval: eval:
- provider_type: inline::meta-reference - provider_type: inline::meta-reference
datasetio: datasetio:

View file

@ -156,10 +156,13 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training: post_training:
- provider_id: torchtune-gpu - provider_id: huggingface-gpu
provider_type: inline::torchtune-gpu provider_type: inline::huggingface-gpu
config: config:
checkpoint_format: meta checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference

View file

@ -17,6 +17,6 @@ def get_distribution_template() -> DistributionTemplate:
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments." template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
template.providers["post_training"] = [ template.providers["post_training"] = [
BuildProvider(provider_type="inline::torchtune-gpu"), BuildProvider(provider_type="inline::huggingface-gpu"),
] ]
return template return template

View file

@ -35,7 +35,7 @@ distribution_spec:
telemetry: telemetry:
- provider_type: inline::meta-reference - provider_type: inline::meta-reference
post_training: post_training:
- provider_type: inline::huggingface-cpu - provider_type: inline::torchtune-cpu
eval: eval:
- provider_type: inline::meta-reference - provider_type: inline::meta-reference
datasetio: datasetio:

View file

@ -156,13 +156,10 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training: post_training:
- provider_id: huggingface-cpu - provider_id: torchtune-cpu
provider_type: inline::huggingface-cpu provider_type: inline::torchtune-cpu
config: config:
checkpoint_format: huggingface checkpoint_format: meta
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter/dpo_output
eval: eval:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference

View file

@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
], ],
"agents": [BuildProvider(provider_type="inline::meta-reference")], "agents": [BuildProvider(provider_type="inline::meta-reference")],
"telemetry": [BuildProvider(provider_type="inline::meta-reference")], "telemetry": [BuildProvider(provider_type="inline::meta-reference")],
"post_training": [BuildProvider(provider_type="inline::huggingface-cpu")], "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
"eval": [BuildProvider(provider_type="inline::meta-reference")], "eval": [BuildProvider(provider_type="inline::meta-reference")],
"datasetio": [ "datasetio": [
BuildProvider(provider_type="remote::huggingface"), BuildProvider(provider_type="remote::huggingface"),

View file

@ -40,8 +40,9 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.inference, api=Api.inference,
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
# CrossEncoder depends on torchao.quantization
pip_packages=[ pip_packages=[
"torch torchvision --index-url https://download.pytorch.org/whl/cpu", "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
], ],
module="llama_stack.providers.inline.inference.sentence_transformers", module="llama_stack.providers.inline.inference.sentence_transformers",

View file

@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec
# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images. # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
torchtune_def = dict( torchtune_def = dict(
api=Api.post_training, api=Api.post_training,
pip_packages=["torchtune==0.5.0", "torchao==0.8.0", "numpy"], pip_packages=["numpy"],
module="llama_stack.providers.inline.post_training.torchtune", module="llama_stack.providers.inline.post_training.torchtune",
config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig", config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig",
api_dependencies=[ api_dependencies=[
@ -23,56 +23,39 @@ torchtune_def = dict(
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.", description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
) )
huggingface_def = dict(
api=Api.post_training,
pip_packages=["trl", "transformers", "peft", "datasets"],
module="llama_stack.providers.inline.post_training.huggingface",
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,
],
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
)
def available_providers() -> list[ProviderSpec]: def available_providers() -> list[ProviderSpec]:
return [ return [
InlineProviderSpec( InlineProviderSpec(
**{ **{ # type: ignore
**torchtune_def, **torchtune_def,
"provider_type": "inline::torchtune-cpu", "provider_type": "inline::torchtune-cpu",
"pip_packages": ( "pip_packages": (
cast(list[str], torchtune_def["pip_packages"]) cast(list[str], torchtune_def["pip_packages"])
+ ["torch torchtune==0.5.0 torchao==0.8.0 --index-url https://download.pytorch.org/whl/cpu"] + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
), ),
}, },
), ),
InlineProviderSpec( InlineProviderSpec(
**{ **{ # type: ignore
**huggingface_def,
"provider_type": "inline::huggingface-cpu",
"pip_packages": (
cast(list[str], huggingface_def["pip_packages"])
+ ["torch --index-url https://download.pytorch.org/whl/cpu"]
),
},
),
InlineProviderSpec(
**{
**torchtune_def, **torchtune_def,
"provider_type": "inline::torchtune-gpu", "provider_type": "inline::torchtune-gpu",
"pip_packages": ( "pip_packages": (
cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune==0.5.0 torchao==0.8.0"] cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"]
), ),
}, },
), ),
InlineProviderSpec( InlineProviderSpec(
**{ api=Api.post_training,
**huggingface_def, provider_type="inline::huggingface-gpu",
"provider_type": "inline::huggingface-gpu", pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
"pip_packages": (cast(list[str], huggingface_def["pip_packages"]) + ["torch"]), module="llama_stack.providers.inline.post_training.huggingface",
}, config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,
],
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.post_training, api=Api.post_training,