Merge branch 'main' into HuggingfacePostTrainingConfig-branch

This commit is contained in:
Sarthak Deshpande 2025-08-28 14:53:07 +05:30 committed by GitHub
commit 66f4af7fec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 118 additions and 187 deletions

View file

@ -80,7 +80,7 @@ def get_provider_dependencies(
normal_deps = []
special_deps = []
for package in deps:
if "--no-deps" in package or "--index-url" in package:
if any(f in package for f in ["--no-deps", "--index-url", "--extra-index-url"]):
special_deps.append(package)
else:
normal_deps.append(package)

View file

@ -34,7 +34,7 @@ distribution_spec:
telemetry:
- provider_type: inline::meta-reference
post_training:
- provider_type: inline::huggingface-cpu
- provider_type: inline::torchtune-cpu
eval:
- provider_type: inline::meta-reference
datasetio:

View file

@ -156,13 +156,10 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- provider_id: huggingface-cpu
provider_type: inline::huggingface-cpu
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/ci-tests/dpo_output
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference

View file

@ -1,7 +1,7 @@
---
orphan: true
---
# Meta Reference Distribution
# Meta Reference GPU Distribution
```{toctree}
:maxdepth: 2
@ -29,7 +29,7 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
$ llama model list --downloaded

View file

@ -35,7 +35,7 @@ distribution_spec:
telemetry:
- provider_type: inline::meta-reference
post_training:
- provider_type: inline::torchtune-gpu
- provider_type: inline::huggingface-gpu
eval:
- provider_type: inline::meta-reference
datasetio:

View file

@ -156,10 +156,13 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- provider_id: torchtune-gpu
provider_type: inline::torchtune-gpu
- provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu
config:
checkpoint_format: meta
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference

View file

@ -17,6 +17,6 @@ def get_distribution_template() -> DistributionTemplate:
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
template.providers["post_training"] = [
BuildProvider(provider_type="inline::torchtune-gpu"),
BuildProvider(provider_type="inline::huggingface-gpu"),
]
return template

View file

@ -35,7 +35,7 @@ distribution_spec:
telemetry:
- provider_type: inline::meta-reference
post_training:
- provider_type: inline::huggingface-cpu
- provider_type: inline::torchtune-cpu
eval:
- provider_type: inline::meta-reference
datasetio:

View file

@ -156,13 +156,10 @@ providers:
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- provider_id: huggingface-cpu
provider_type: inline::huggingface-cpu
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter/dpo_output
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference

View file

@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
],
"agents": [BuildProvider(provider_type="inline::meta-reference")],
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
"post_training": [BuildProvider(provider_type="inline::huggingface-cpu")],
"post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
"eval": [BuildProvider(provider_type="inline::meta-reference")],
"datasetio": [
BuildProvider(provider_type="remote::huggingface"),

View file

@ -40,8 +40,9 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.inference,
provider_type="inline::sentence-transformers",
# CrossEncoder depends on torchao.quantization
pip_packages=[
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
],
module="llama_stack.providers.inline.inference.sentence_transformers",

View file

@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec
# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
torchtune_def = dict(
api=Api.post_training,
pip_packages=["torchtune==0.5.0", "torchao==0.8.0", "numpy"],
pip_packages=["numpy"],
module="llama_stack.providers.inline.post_training.torchtune",
config_class="llama_stack.providers.inline.post_training.torchtune.TorchtunePostTrainingConfig",
api_dependencies=[
@ -23,56 +23,39 @@ torchtune_def = dict(
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
)
huggingface_def = dict(
api=Api.post_training,
pip_packages=["trl", "transformers", "peft", "datasets"],
module="llama_stack.providers.inline.post_training.huggingface",
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,
],
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
)
def available_providers() -> list[ProviderSpec]:
return [
InlineProviderSpec(
**{
**{ # type: ignore
**torchtune_def,
"provider_type": "inline::torchtune-cpu",
"pip_packages": (
cast(list[str], torchtune_def["pip_packages"])
+ ["torch torchtune==0.5.0 torchao==0.8.0 --index-url https://download.pytorch.org/whl/cpu"]
+ ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
),
},
),
InlineProviderSpec(
**{
**huggingface_def,
"provider_type": "inline::huggingface-cpu",
"pip_packages": (
cast(list[str], huggingface_def["pip_packages"])
+ ["torch --index-url https://download.pytorch.org/whl/cpu"]
),
},
),
InlineProviderSpec(
**{
**{ # type: ignore
**torchtune_def,
"provider_type": "inline::torchtune-gpu",
"pip_packages": (
cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune==0.5.0 torchao==0.8.0"]
cast(list[str], torchtune_def["pip_packages"]) + ["torch torchtune>=0.5.0 torchao>=0.12.0"]
),
},
),
InlineProviderSpec(
**{
**huggingface_def,
"provider_type": "inline::huggingface-gpu",
"pip_packages": (cast(list[str], huggingface_def["pip_packages"]) + ["torch"]),
},
api=Api.post_training,
provider_type="inline::huggingface-gpu",
pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
module="llama_stack.providers.inline.post_training.huggingface",
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,
],
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
),
remote_provider_spec(
api=Api.post_training,

View file

@ -9,7 +9,6 @@ from __future__ import annotations # for forward references
import hashlib
import json
import os
import sqlite3
from collections.abc import Generator
from contextlib import contextmanager
from enum import StrEnum
@ -125,28 +124,13 @@ class ResponseStorage:
def __init__(self, test_dir: Path):
self.test_dir = test_dir
self.responses_dir = self.test_dir / "responses"
self.db_path = self.test_dir / "index.sqlite"
self._ensure_directories()
self._init_database()
def _ensure_directories(self):
self.test_dir.mkdir(parents=True, exist_ok=True)
self.responses_dir.mkdir(exist_ok=True)
def _init_database(self):
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS recordings (
request_hash TEXT PRIMARY KEY,
response_file TEXT,
endpoint TEXT,
model TEXT,
timestamp TEXT,
is_streaming BOOLEAN
)
""")
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
"""Store a request/response pair."""
# Generate unique response filename
@ -169,34 +153,9 @@ class ResponseStorage:
f.write("\n")
f.flush()
# Update SQLite index
with sqlite3.connect(self.db_path) as conn:
conn.execute(
"""
INSERT OR REPLACE INTO recordings
(request_hash, response_file, endpoint, model, timestamp, is_streaming)
VALUES (?, ?, ?, ?, datetime('now'), ?)
""",
(
request_hash,
response_file,
request.get("endpoint", ""),
request.get("model", ""),
response.get("is_streaming", False),
),
)
def find_recording(self, request_hash: str) -> dict[str, Any] | None:
"""Find a recorded response by request hash."""
with sqlite3.connect(self.db_path) as conn:
result = conn.execute(
"SELECT response_file FROM recordings WHERE request_hash = ?", (request_hash,)
).fetchone()
if not result:
return None
response_file = result[0]
response_file = f"{request_hash[:12]}.json"
response_path = self.responses_dir / response_file
if not response_path.exists():

View file

@ -18,7 +18,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^11.18.2",
"llama-stack-client": "^0.2.18",
"llama-stack-client": "^0.2.19",
"lucide-react": "^0.510.0",
"next": "15.3.3",
"next-auth": "^4.24.11",
@ -36,7 +36,7 @@
"@eslint/eslintrc": "^3",
"@tailwindcss/postcss": "^4",
"@testing-library/dom": "^10.4.1",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",
"@types/jest": "^29.5.14",
"@types/node": "^20",
@ -3597,18 +3597,17 @@
}
},
"node_modules/@testing-library/jest-dom": {
"version": "6.6.3",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.6.3.tgz",
"integrity": "sha512-IteBhl4XqYNkM54f4ejhLRJiZNqcSCoXUOG2CPK7qbD322KjQozM4kHQOfkG2oln9b9HTYqs+Sae8vBATubxxA==",
"version": "6.8.0",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz",
"integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@adobe/css-tools": "^4.4.0",
"aria-query": "^5.0.0",
"chalk": "^3.0.0",
"css.escape": "^1.5.1",
"dom-accessibility-api": "^0.6.3",
"lodash": "^4.17.21",
"picocolors": "^1.1.1",
"redent": "^3.0.0"
},
"engines": {
@ -3617,20 +3616,6 @@
"yarn": ">=1"
}
},
"node_modules/@testing-library/jest-dom/node_modules/chalk": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz",
"integrity": "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==",
"dev": true,
"license": "MIT",
"dependencies": {
"ansi-styles": "^4.1.0",
"supports-color": "^7.1.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz",
@ -10021,9 +10006,9 @@
"license": "MIT"
},
"node_modules/llama-stack-client": {
"version": "0.2.18",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.18.tgz",
"integrity": "sha512-k+xQOz/TIU0cINP4Aih8q6xs7f/6qs0fLDMXTTKQr5C0F1jtCjRiwsas7bTsDfpKfYhg/7Xy/wPw/uZgi6aIVg==",
"version": "0.2.19",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.19.tgz",
"integrity": "sha512-sDuAhUdEGlERZ3jlMUzPXcQTgMv/pGbDrPX0ifbE5S+gr7Q+7ohuQYrIXe+hXgIipFjq+y4b2c5laZ76tmAyEA==",
"license": "MIT",
"dependencies": {
"@types/node": "^18.11.18",
@ -10066,13 +10051,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"dev": true,
"license": "MIT"
},
"node_modules/lodash.merge": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",

View file

@ -23,7 +23,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^11.18.2",
"llama-stack-client": "^0.2.18",
"llama-stack-client": "^0.2.19",
"lucide-react": "^0.510.0",
"next": "15.3.3",
"next-auth": "^4.24.11",
@ -41,7 +41,7 @@
"@eslint/eslintrc": "^3",
"@tailwindcss/postcss": "^4",
"@testing-library/dom": "^10.4.1",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",
"@types/jest": "^29.5.14",
"@types/node": "^20",