Introduce Llama stack distributions (#22)

* Add distribution CLI scaffolding

* More progress towards `llama distribution install`

* getting closer to a distro definition, distro install + configure works

* Distribution server now functioning

* read existing configuration, save enums properly

* Remove inference uvicorn server entrypoint and llama inference CLI command

* updated dependency and client model name

* Improved exception handling

* local imports for faster cli

* undo a typo, add a passthrough distribution

* implement full-passthrough in the server

* add safety adapters, configuration handling, server + clients

* cleanup, moving stuff to common, nuke utils

* Add a Path() wrapper at the earliest place

* fixes

* Bring agentic system api to toolchain

Add adapter dependencies and resolve adapters using a topological sort

* refactor to reduce size of `agentic_system`

* move straggler files and fix some important existing bugs

* ApiSurface -> Api

* refactor a method out

* Adapter -> Provider

* Make each inference provider into its own subdirectory

* installation fixes

* Rename Distribution -> DistributionSpec, simplify RemoteProviders

* dict key instead of attr

* update inference config to take model and not model_dir

* Fix passthrough streaming, send headers properly not part of body :facepalm

* update safety to use model sku ids and not model dirs

* Update cli_reference.md

* minor fixes

* add DistributionConfig, fix a bug in model download

* Make install + start scripts do proper configuration automatically

* Update CLI_reference

* Nuke fp8_requirements, fold fbgemm into common requirements

* Update README, add newline between API surface configurations

* Refactor download functionality out of the Command so can be reused

* Add `llama model download` alias for `llama download`

* Show message about checksum file so users can check themselves

* Simpler intro statements

* get ollama working

* Reduce a bunch of dependencies from toolchain

Some improvements to the distribution install script

* Avoid using `conda run` since it buffers everything

* update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes

* add validation for configuration input

* resort imports

* make optional subclasses default to yes for configuration

* Remove additional_pip_packages; move deps to providers

* for inline make 8b model the default

* Add scripts to MANIFEST

* allow installing from test.pypi.org

* Fix #2 to help with testing packages

* Must install llama-models at that same version first

* fix PIP_ARGS

---------

Co-authored-by: Hardik Shah <hjshah@fb.com>
Co-authored-by: Hardik Shah <hjshah@meta.com>
This commit is contained in:
Ashwin Bharambe 2024-08-08 13:38:41 -07:00 committed by GitHub
parent da4645a27a
commit e830814399
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
115 changed files with 5839 additions and 1120 deletions

View file

@ -9,26 +9,14 @@ import asyncio
import os
import shutil
import time
from functools import partial
from pathlib import Path
import httpx
from huggingface_hub import snapshot_download
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
from llama_models.datatypes import Model
from llama_models.sku_list import (
all_registered_models,
llama_meta_net_info,
resolve_model,
)
from termcolor import cprint
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.utils import DEFAULT_DUMP_DIR
DEFAULT_CHECKPOINT_DIR = os.path.join(DEFAULT_DUMP_DIR, "checkpoints")
class Download(Subcommand):
@ -42,107 +30,130 @@ class Download(Subcommand):
description="Download a model from llama.meta.comf or HuggingFace hub",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_download_cmd)
setup_download_parser(self.parser)
def _add_arguments(self):
models = all_registered_models()
self.parser.add_argument(
"--source",
choices=["meta", "huggingface"],
required=True,
)
self.parser.add_argument(
"--model-id",
choices=[x.descriptor() for x in models],
required=True,
)
self.parser.add_argument(
"--hf-token",
type=str,
required=False,
default=None,
help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.",
)
self.parser.add_argument(
"--meta-url",
type=str,
required=False,
help="For source=meta, URL obtained from llama.meta.com after accepting license terms",
)
self.parser.add_argument(
"--ignore-patterns",
type=str,
required=False,
default="*.safetensors",
help="""
def setup_download_parser(parser: argparse.ArgumentParser) -> None:
from llama_models.sku_list import all_registered_models
models = all_registered_models()
parser.add_argument(
"--source",
choices=["meta", "huggingface"],
required=True,
)
parser.add_argument(
"--model-id",
choices=[x.descriptor() for x in models],
required=True,
)
parser.add_argument(
"--hf-token",
type=str,
required=False,
default=None,
help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.",
)
parser.add_argument(
"--meta-url",
type=str,
required=False,
help="For source=meta, URL obtained from llama.meta.com after accepting license terms",
)
parser.add_argument(
"--ignore-patterns",
type=str,
required=False,
default="*.safetensors",
help="""
For source=huggingface, files matching any of the patterns are not downloaded. Defaults to ignoring
safetensors files to avoid downloading duplicate weights.
""",
)
parser.set_defaults(func=partial(run_download_cmd, parser=parser))
def _hf_download(
model: "Model",
hf_token: str,
ignore_patterns: str,
parser: argparse.ArgumentParser,
):
from huggingface_hub import snapshot_download
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
from llama_toolchain.common.model_utils import model_local_dir
repo_id = model.huggingface_repo
if repo_id is None:
raise ValueError(f"No repo id found for model {model.descriptor()}")
output_dir = model_local_dir(model)
os.makedirs(output_dir, exist_ok=True)
try:
true_output_dir = snapshot_download(
repo_id,
local_dir=output_dir,
ignore_patterns=ignore_patterns,
token=hf_token,
library_name="llama-toolchain",
)
except GatedRepoError:
parser.error(
"It looks like you are trying to access a gated repository. Please ensure you "
"have access to the repository and have provided the proper Hugging Face API token "
"using the option `--hf-token` or by running `huggingface-cli login`."
"You can find your token by visiting https://huggingface.co/settings/tokens"
)
except RepositoryNotFoundError:
parser.error(f"Repository '{args.repo_id}' not found on the Hugging Face Hub.")
except Exception as e:
parser.error(e)
def _hf_download(self, model: Model, hf_token: str, ignore_patterns: str):
repo_id = model.huggingface_repo
if repo_id is None:
raise ValueError(f"No repo id found for model {model.descriptor()}")
print(f"\nSuccessfully downloaded model to {true_output_dir}")
output_dir = Path(DEFAULT_CHECKPOINT_DIR) / model.descriptor()
os.makedirs(output_dir, exist_ok=True)
try:
true_output_dir = snapshot_download(
repo_id,
local_dir=output_dir,
ignore_patterns=ignore_patterns,
token=hf_token,
library_name="llama-toolchain",
def _meta_download(model: "Model", meta_url: str):
from llama_models.sku_list import llama_meta_net_info
from llama_toolchain.common.model_utils import model_local_dir
output_dir = Path(model_local_dir(model))
os.makedirs(output_dir, exist_ok=True)
info = llama_meta_net_info(model)
# I believe we can use some concurrency here if needed but not sure it is worth it
for f in info.files:
output_file = str(output_dir / f)
url = meta_url.replace("*", f"{info.folder}/{f}")
total_size = info.pth_size if "consolidated" in f else 0
cprint(f"Downloading `{f}`...", "white")
downloader = ResumableDownloader(url, output_file, total_size)
asyncio.run(downloader.download())
print(f"\nSuccessfully downloaded model to {output_dir}")
cprint(f"\nMD5 Checksums are at: {output_dir / 'checklist.chk'}", "white")
def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
from llama_models.sku_list import resolve_model
model = resolve_model(args.model_id)
if model is None:
parser.error(f"Model {args.model_id} not found")
return
if args.source == "huggingface":
_hf_download(model, args.hf_token, args.ignore_patterns, parser)
else:
meta_url = args.meta_url
if not meta_url:
meta_url = input(
"Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): "
)
except GatedRepoError:
self.parser.error(
"It looks like you are trying to access a gated repository. Please ensure you "
"have access to the repository and have provided the proper Hugging Face API token "
"using the option `--hf-token` or by running `huggingface-cli login`."
"You can find your token by visiting https://huggingface.co/settings/tokens"
)
except RepositoryNotFoundError:
self.parser.error(
f"Repository '{args.repo_id}' not found on the Hugging Face Hub."
)
except Exception as e:
self.parser.error(e)
print(f"Successfully downloaded model to {true_output_dir}")
def _meta_download(self, model: Model, meta_url: str):
output_dir = Path(DEFAULT_CHECKPOINT_DIR) / model.descriptor()
os.makedirs(output_dir, exist_ok=True)
info = llama_meta_net_info(model)
# I believe we can use some concurrency here if needed but not sure it is worth it
for f in info.files:
output_file = str(output_dir / f)
url = meta_url.replace("*", f"{info.folder}/{f}")
total_size = info.pth_size if "consolidated" in f else 0
cprint(f"Downloading `{f}`...", "white")
downloader = ResumableDownloader(url, output_file, total_size)
asyncio.run(downloader.download())
def _run_download_cmd(self, args: argparse.Namespace):
model = resolve_model(args.model_id)
if model is None:
self.parser.error(f"Model {args.model_id} not found")
return
if args.source == "huggingface":
self._hf_download(model, args.hf_token, args.ignore_patterns)
else:
meta_url = args.meta_url
if not meta_url:
meta_url = input(
"Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): "
)
assert meta_url is not None and "llama3-1.llamameta.net" in meta_url
self._meta_download(model, meta_url)
assert meta_url is not None and "llama3-1.llamameta.net" in meta_url
_meta_download(model, meta_url)
class ResumableDownloader: