diff --git a/docs/source/providers/post_training/index.md b/docs/source/providers/post_training/index.md
index 5ada6f9aa..e69f2a45a 100644
--- a/docs/source/providers/post_training/index.md
+++ b/docs/source/providers/post_training/index.md
@@ -9,7 +9,6 @@ This section contains documentation for all available providers for the **post_t
 ```{toctree}
 :maxdepth: 1
 
-inline_huggingface-cpu
 inline_huggingface-gpu
 inline_torchtune-cpu
 inline_torchtune-gpu
diff --git a/llama_stack/core/build.py b/llama_stack/core/build.py
index fa1fe632b..2ceb9e9be 100644
--- a/llama_stack/core/build.py
+++ b/llama_stack/core/build.py
@@ -80,7 +80,7 @@ def get_provider_dependencies(
     normal_deps = []
     special_deps = []
     for package in deps:
-        if "--no-deps" in package or "--index-url" in package:
+        if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]):
             special_deps.append(package)
         else:
             normal_deps.append(package)
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index b4701cb81..8e6c0bf67 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -34,7 +34,7 @@ distribution_spec:
     telemetry:
     - provider_type: inline::meta-reference
     post_training:
-    - provider_type: inline::huggingface-cpu
+    - provider_type: inline::torchtune-cpu
     eval:
     - provider_type: inline::meta-reference
     datasetio:
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index 3acdd20f9..7523df581 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -156,13 +156,10 @@ providers:
       sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
       otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
   post_training:
-  - provider_id: huggingface-cpu
-    provider_type: inline::huggingface-cpu
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
     config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-      dpo_output_dir: ~/.llama/distributions/ci-tests/dpo_output
+      checkpoint_format: meta
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml
index ae0680cdc..ff7c58e6f 100644
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@@ -35,7 +35,7 @@ distribution_spec:
     telemetry:
     - provider_type: inline::meta-reference
     post_training:
-    - provider_type: inline::torchtune-gpu
+    - provider_type: inline::huggingface-gpu
     eval:
     - provider_type: inline::meta-reference
     datasetio:
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 81c802317..8aed61519 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -156,10 +156,13 @@ providers:
       sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
       otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
   post_training:
-  - provider_id: torchtune-gpu
-    provider_type: inline::torchtune-gpu
+  - provider_id: huggingface-gpu
+    provider_type: inline::huggingface-gpu
     config:
-      checkpoint_format: meta
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+      dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/distributions/starter-gpu/starter_gpu.py b/llama_stack/distributions/starter-gpu/starter_gpu.py
index 893df6c17..245334749 100644
--- a/llama_stack/distributions/starter-gpu/starter_gpu.py
+++ b/llama_stack/distributions/starter-gpu/starter_gpu.py
@@ -17,6 +17,6 @@ def get_distribution_template() -> DistributionTemplate:
     template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
 
     template.providers["post_training"] = [
-        BuildProvider(provider_type="inline::torchtune-gpu"),
+        BuildProvider(provider_type="inline::huggingface-gpu"),
     ]
     return template
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index 3df0eb129..e84e528da 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -35,7 +35,7 @@ distribution_spec:
     telemetry:
     - provider_type: inline::meta-reference
     post_training:
-    - provider_type: inline::huggingface-cpu
+    - provider_type: inline::torchtune-cpu
     eval:
     - provider_type: inline::meta-reference
     datasetio:
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 7e1d46a61..a3962b8aa 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -156,13 +156,10 @@ providers:
       sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
       otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
   post_training:
-  - provider_id: huggingface-cpu
-    provider_type: inline::huggingface-cpu
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
     config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-      dpo_output_dir: ~/.llama/distributions/starter/dpo_output
+      checkpoint_format: meta
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index f49da0bb7..a4bbc6371 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
         ],
         "agents": [BuildProvider(provider_type="inline::meta-reference")],
         "telemetry": [BuildProvider(provider_type="inline::meta-reference")],
-        "post_training": [BuildProvider(provider_type="inline::huggingface-cpu")],
+        "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
         "eval": [BuildProvider(provider_type="inline::meta-reference")],
         "datasetio": [
             BuildProvider(provider_type="remote::huggingface"),
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 1801cdcad..82b771a28 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -40,8 +40,9 @@ def available_providers() -> list[ProviderSpec]:
         InlineProviderSpec(
             api=Api.inference,
             provider_type="inline::sentence-transformers",
+            # CrossEncoder depends on torchao.quantization
             pip_packages=[
-                "torch torchvision --index-url https://download.pytorch.org/whl/cpu",
+                "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
                 "sentence-transformers --no-deps",
             ],
             module="llama_stack.providers.inline.inference.sentence_transformers",
diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py
index 4443f4df1..67238e3fc 100644
--- a/llama_stack/providers/registry/post_training.py
+++ b/llama_stack/providers/registry/post_training.py
@@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec
 # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
 torchtune_def = dict(
     api=Api.post_training,
-    pip_packages=["torchtune==0.5.0", "torchao==0.8.0", "numpy"],
+    pip_packages=["numpy"],
     module="llama_stack.providers.inline.post_training.torchtune",
     config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig",
     api_dependencies=[
@@ -23,56 +23,39 @@ torchtune_def = dict(
     description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
 )
 
-huggingface_def = dict(
-    api=Api.post_training,
-    pip_packages=["trl", "transformers", "peft", "datasets"],
-    module="llama_stack.providers.inline.post_training.huggingface",
-    config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
-    api_dependencies=[
-        Api.datasetio,
-        Api.datasets,
-    ],
-    description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
-)
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
-            **{
+            **{  # type: ignore
                 **torchtune_def,
                 "provider_type": "inline::torchtune-cpu",
                 "pip_packages": (
                     cast(list[str], torchtune_def["pip_packages"])
-                    + ["torch torchtune==0.5.0 torchao==0.8.0 --index-url https://download.pytorch.org/whl/cpu"]
+                    + ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
                 ),
             },
         ),
         InlineProviderSpec(
-            **{
-                **huggingface_def,
-                "provider_type": "inline::huggingface-cpu",
-                "pip_packages": (
-                    cast(list[str], huggingface_def["pip_packages"])
-                    + ["torch --index-url https://download.pytorch.org/whl/cpu"]
-                ),
-            },
-        ),
-        InlineProviderSpec(
-            **{
+            **{  # type: ignore
                 **torchtune_def,
                 "provider_type": "inline::torchtune-gpu",
                 "pip_packages": (
-                    cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune==0.5.0 torchao==0.8.0"]
+                    cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"]
                 ),
             },
         ),
         InlineProviderSpec(
-            **{
-                **huggingface_def,
-                "provider_type": "inline::huggingface-gpu",
-                "pip_packages": (cast(list[str], huggingface_def["pip_packages"]) + ["torch"]),
-            },
+            api=Api.post_training,
+            provider_type="inline::huggingface-gpu",
+            pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
+            module="llama_stack.providers.inline.post_training.huggingface",
+            config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
+            api_dependencies=[
+                Api.datasetio,
+                Api.datasets,
+            ],
+            description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
         ),
         remote_provider_spec(
             api=Api.post_training,