Introduce Llama stack distributions (#22)

* Add distribution CLI scaffolding

* More progress towards `llama distribution install`

* getting closer to a distro definition, distro install + configure works

* Distribution server now functioning

* read existing configuration, save enums properly

* Remove inference uvicorn server entrypoint and llama inference CLI command

* updated dependency and client model name

* Improved exception handling

* local imports for faster cli

* undo a typo, add a passthrough distribution

* implement full-passthrough in the server

* add safety adapters, configuration handling, server + clients

* cleanup, moving stuff to common, nuke utils

* Add a Path() wrapper at the earliest place

* fixes

* Bring agentic system api to toolchain

Add adapter dependencies and resolve adapters using a topological sort

* refactor to reduce size of `agentic_system`

* move straggler files and fix some important existing bugs

* ApiSurface -> Api

* refactor a method out

* Adapter -> Provider

* Make each inference provider into its own subdirectory

* installation fixes

* Rename Distribution -> DistributionSpec, simplify RemoteProviders

* dict key instead of attr

* update inference config to take model and not model_dir

* Fix passthrough streaming, send headers properly not part of body :facepalm

* update safety to use model sku ids and not model dirs

* Update cli_reference.md

* minor fixes

* add DistributionConfig, fix a bug in model download

* Make install + start scripts do proper configuration automatically

* Update CLI_reference

* Nuke fp8_requirements, fold fbgemm into common requirements

* Update README, add newline between API surface configurations

* Refactor download functionality out of the Command so can be reused

* Add `llama model download` alias for `llama download`

* Show message about checksum file so users can check themselves

* Simpler intro statements

* get ollama working

* Reduce a bunch of dependencies from toolchain

Some improvements to the distribution install script

* Avoid using `conda run` since it buffers everything

* update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes

* add validation for configuration input

* resort imports

* make optional subclasses default to yes for configuration

* Remove additional_pip_packages; move deps to providers

* for inline make 8b model the default

* Add scripts to MANIFEST

* allow installing from test.pypi.org

* Fix #2 to help with testing packages

* Must install llama-models at that same version first

* fix PIP_ARGS

---------

Co-authored-by: Hardik Shah <hjshah@fb.com>
Co-authored-by: Hardik Shah <hjshah@meta.com>
This commit is contained in:
Ashwin Bharambe 2024-08-08 13:38:41 -07:00 committed by GitHub
parent da4645a27a
commit e830814399
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
115 changed files with 5839 additions and 1120 deletions

View file

@ -3,3 +3,5 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .distribution import DistributionParser # noqa

View file

@ -0,0 +1,106 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import json
import shlex
import yaml
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
from termcolor import cprint
class DistributionConfigure(Subcommand):
"""Llama cli for configuring llama toolchain configs"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"configure",
prog="llama distribution configure",
description="configure a llama stack distribution",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_distribution_configure_cmd)
def _add_arguments(self):
self.parser.add_argument(
"--name",
type=str,
help="Name of the distribution to configure",
required=True,
)
def _run_distribution_configure_cmd(self, args: argparse.Namespace) -> None:
from llama_toolchain.distribution.datatypes import DistributionConfig
from llama_toolchain.distribution.registry import resolve_distribution_spec
config_file = DISTRIBS_BASE_DIR / args.name / "config.yaml"
if not config_file.exists():
self.parser.error(
f"Could not find {config_file}. Please run `llama distribution install` first"
)
return
# we need to find the spec from the name
with open(config_file, "r") as f:
config = DistributionConfig(**yaml.safe_load(f))
dist = resolve_distribution_spec(config.spec)
if dist is None:
raise ValueError(f"Could not find any registered spec `{config.spec}`")
configure_llama_distribution(dist, config)
def configure_llama_distribution(dist: "Distribution", config: "DistributionConfig"):
from llama_toolchain.common.exec import run_command
from llama_toolchain.common.prompt_for_config import prompt_for_config
from llama_toolchain.common.serialize import EnumEncoder
from llama_toolchain.distribution.dynamic import instantiate_class_type
python_exe = run_command(shlex.split("which python"))
# simple check
conda_env = config.conda_env
if conda_env not in python_exe:
raise ValueError(
f"Please re-run configure by activating the `{conda_env}` conda environment"
)
if config.providers:
cprint(
f"Configuration already exists for {config.name}. Will overwrite...",
"yellow",
attrs=["bold"],
)
for api, provider_spec in dist.provider_specs.items():
cprint(f"Configuring API surface: {api.value}", "white", attrs=["bold"])
config_type = instantiate_class_type(provider_spec.config_class)
provider_config = prompt_for_config(
config_type,
(
config_type(**config.providers[api.value])
if api.value in config.providers
else None
),
)
print("")
config.providers[api.value] = {
"provider_id": provider_spec.provider_id,
**provider_config.dict(),
}
config_path = DISTRIBS_BASE_DIR / config.name / "config.yaml"
with open(config_path, "w") as fp:
dist_config = json.loads(json.dumps(config.dict(), cls=EnumEncoder))
fp.write(yaml.dump(dist_config, sort_keys=False))
print(f"YAML configuration has been written to {config_path}")

View file

@ -0,0 +1,44 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
from llama_toolchain.cli.subcommand import Subcommand
class DistributionCreate(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"create",
prog="llama distribution create",
description="create a Llama stack distribution",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_distribution_create_cmd)
def _add_arguments(self):
self.parser.add_argument(
"--name",
type=str,
help="Name of the distribution to create",
required=True,
)
# for each Api the user wants to support, we should
# get the list of available providers, ask which one the user
# wants to pick and then ask for their configuration.
def _run_distribution_create_cmd(self, args: argparse.Namespace) -> None:
from llama_toolchain.distribution.registry import resolve_distribution_spec
dist = resolve_distribution_spec(args.name)
if dist is not None:
self.parser.error(f"Distribution with name {args.name} already exists")
return
raise NotImplementedError()

View file

@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
from llama_toolchain.cli.subcommand import Subcommand
from .configure import DistributionConfigure
from .create import DistributionCreate
from .install import DistributionInstall
from .list import DistributionList
from .start import DistributionStart
class DistributionParser(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"distribution",
prog="llama distribution",
description="Operate on llama stack distributions",
)
subparsers = self.parser.add_subparsers(title="distribution_subcommands")
# Add sub-commands
DistributionList.create(subparsers)
DistributionInstall.create(subparsers)
DistributionCreate.create(subparsers)
DistributionConfigure.create(subparsers)
DistributionStart.create(subparsers)

View file

@ -0,0 +1,111 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import os
import pkg_resources
import yaml
from termcolor import cprint
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
class DistributionInstall(Subcommand):
"""Llama cli for configuring llama toolchain configs"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"install",
prog="llama distribution install",
description="Install a llama stack distribution",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_distribution_install_cmd)
def _add_arguments(self):
from llama_toolchain.distribution.registry import available_distribution_specs
self.parser.add_argument(
"--spec",
type=str,
help="Distribution spec to install (try ollama-inline)",
required=True,
choices=[d.spec_id for d in available_distribution_specs()],
)
self.parser.add_argument(
"--name",
type=str,
help="What should the installation be called locally?",
required=True,
)
self.parser.add_argument(
"--conda-env",
type=str,
help="conda env in which this distribution will run (default = distribution name)",
)
def _run_distribution_install_cmd(self, args: argparse.Namespace) -> None:
from llama_toolchain.common.exec import run_with_pty
from llama_toolchain.distribution.datatypes import DistributionConfig
from llama_toolchain.distribution.distribution import distribution_dependencies
from llama_toolchain.distribution.registry import resolve_distribution_spec
os.makedirs(DISTRIBS_BASE_DIR, exist_ok=True)
script = pkg_resources.resource_filename(
"llama_toolchain",
"distribution/install_distribution.sh",
)
dist = resolve_distribution_spec(args.spec)
if dist is None:
self.parser.error(f"Could not find distribution {args.spec}")
return
distrib_dir = DISTRIBS_BASE_DIR / args.name
os.makedirs(distrib_dir, exist_ok=True)
deps = distribution_dependencies(dist)
if not args.conda_env:
print(f"Using {args.name} as the Conda environment for this distribution")
conda_env = args.conda_env or args.name
config_file = distrib_dir / "config.yaml"
if config_file.exists():
c = DistributionConfig(**yaml.safe_load(config_file.read_text()))
if c.spec != dist.spec_id:
self.parser.error(
f"already installed distribution with `spec={c.spec}` does not match provided spec `{args.spec}`"
)
return
if c.conda_env != conda_env:
self.parser.error(
f"already installed distribution has `conda_env={c.conda_env}` different from provided conda env `{conda_env}`"
)
return
else:
with open(config_file, "w") as f:
c = DistributionConfig(
spec=dist.spec_id,
name=args.name,
conda_env=conda_env,
)
f.write(yaml.dump(c.dict(), sort_keys=False))
return_code = run_with_pty([script, conda_env, args.name, " ".join(deps)])
assert return_code == 0, cprint(
f"Failed to install distribution {dist.spec_id}", color="red"
)
cprint(
f"Distribution `{args.name}` (with spec {dist.spec_id}) has been installed successfully!",
color="green",
)

View file

@ -0,0 +1,54 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import json
from llama_toolchain.cli.subcommand import Subcommand
class DistributionList(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"list",
prog="llama distribution list",
description="Show available llama stack distributions",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_distribution_list_cmd)
def _add_arguments(self):
pass
def _run_distribution_list_cmd(self, args: argparse.Namespace) -> None:
from llama_toolchain.cli.table import print_table
from llama_toolchain.distribution.registry import available_distribution_specs
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
headers = [
"Spec ID",
"ProviderSpecs",
"Description",
]
rows = []
for spec in available_distribution_specs():
providers = {k.value: v.provider_id for k, v in spec.provider_specs.items()}
rows.append(
[
spec.spec_id,
json.dumps(providers, indent=2),
spec.description,
]
)
print_table(
rows,
headers,
separate_rows=True,
)

View file

@ -0,0 +1,82 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import pkg_resources
import yaml
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
class DistributionStart(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"start",
prog="llama distribution start",
description="""start the server for a Llama stack distribution. you should have already installed and configured the distribution""",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_distribution_start_cmd)
def _add_arguments(self):
self.parser.add_argument(
"--name",
type=str,
help="Name of the distribution to start",
required=True,
)
self.parser.add_argument(
"--port",
type=int,
help="Port to run the server on. Defaults to 5000",
default=5000,
)
self.parser.add_argument(
"--disable-ipv6",
action="store_true",
help="Disable IPv6 support",
default=False,
)
def _run_distribution_start_cmd(self, args: argparse.Namespace) -> None:
from llama_toolchain.common.exec import run_with_pty
from llama_toolchain.distribution.registry import resolve_distribution_spec
config_file = DISTRIBS_BASE_DIR / args.name / "config.yaml"
if not config_file.exists():
self.parser.error(
f"Could not find {config_file}. Please run `llama distribution install` first"
)
return
# we need to find the spec from the name
with open(config_file, "r") as f:
config = yaml.safe_load(f)
dist = resolve_distribution_spec(config["spec"])
if dist is None:
raise ValueError(f"Could not find any registered spec `{config['spec']}`")
conda_env = config["conda_env"]
if not conda_env:
raise ValueError(
f"Could not find Conda environment for distribution `{args.name}`"
)
script = pkg_resources.resource_filename(
"llama_toolchain",
"distribution/start_distribution.sh",
)
args = [script, conda_env, config_file, "--port", str(args.port)] + (
["--disable-ipv6"] if args.disable_ipv6 else []
)
run_with_pty(args)

View file

@ -9,26 +9,14 @@ import asyncio
import os
import shutil
import time
from functools import partial
from pathlib import Path
import httpx
from huggingface_hub import snapshot_download
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
from llama_models.datatypes import Model
from llama_models.sku_list import (
all_registered_models,
llama_meta_net_info,
resolve_model,
)
from termcolor import cprint
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.utils import DEFAULT_DUMP_DIR
DEFAULT_CHECKPOINT_DIR = os.path.join(DEFAULT_DUMP_DIR, "checkpoints")
class Download(Subcommand):
@ -42,107 +30,130 @@ class Download(Subcommand):
description="Download a model from llama.meta.comf or HuggingFace hub",
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_download_cmd)
setup_download_parser(self.parser)
def _add_arguments(self):
models = all_registered_models()
self.parser.add_argument(
"--source",
choices=["meta", "huggingface"],
required=True,
)
self.parser.add_argument(
"--model-id",
choices=[x.descriptor() for x in models],
required=True,
)
self.parser.add_argument(
"--hf-token",
type=str,
required=False,
default=None,
help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.",
)
self.parser.add_argument(
"--meta-url",
type=str,
required=False,
help="For source=meta, URL obtained from llama.meta.com after accepting license terms",
)
self.parser.add_argument(
"--ignore-patterns",
type=str,
required=False,
default="*.safetensors",
help="""
def setup_download_parser(parser: argparse.ArgumentParser) -> None:
from llama_models.sku_list import all_registered_models
models = all_registered_models()
parser.add_argument(
"--source",
choices=["meta", "huggingface"],
required=True,
)
parser.add_argument(
"--model-id",
choices=[x.descriptor() for x in models],
required=True,
)
parser.add_argument(
"--hf-token",
type=str,
required=False,
default=None,
help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.",
)
parser.add_argument(
"--meta-url",
type=str,
required=False,
help="For source=meta, URL obtained from llama.meta.com after accepting license terms",
)
parser.add_argument(
"--ignore-patterns",
type=str,
required=False,
default="*.safetensors",
help="""
For source=huggingface, files matching any of the patterns are not downloaded. Defaults to ignoring
safetensors files to avoid downloading duplicate weights.
""",
)
parser.set_defaults(func=partial(run_download_cmd, parser=parser))
def _hf_download(
model: "Model",
hf_token: str,
ignore_patterns: str,
parser: argparse.ArgumentParser,
):
from huggingface_hub import snapshot_download
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
from llama_toolchain.common.model_utils import model_local_dir
repo_id = model.huggingface_repo
if repo_id is None:
raise ValueError(f"No repo id found for model {model.descriptor()}")
output_dir = model_local_dir(model)
os.makedirs(output_dir, exist_ok=True)
try:
true_output_dir = snapshot_download(
repo_id,
local_dir=output_dir,
ignore_patterns=ignore_patterns,
token=hf_token,
library_name="llama-toolchain",
)
except GatedRepoError:
parser.error(
"It looks like you are trying to access a gated repository. Please ensure you "
"have access to the repository and have provided the proper Hugging Face API token "
"using the option `--hf-token` or by running `huggingface-cli login`."
"You can find your token by visiting https://huggingface.co/settings/tokens"
)
except RepositoryNotFoundError:
parser.error(f"Repository '{args.repo_id}' not found on the Hugging Face Hub.")
except Exception as e:
parser.error(e)
def _hf_download(self, model: Model, hf_token: str, ignore_patterns: str):
repo_id = model.huggingface_repo
if repo_id is None:
raise ValueError(f"No repo id found for model {model.descriptor()}")
print(f"\nSuccessfully downloaded model to {true_output_dir}")
output_dir = Path(DEFAULT_CHECKPOINT_DIR) / model.descriptor()
os.makedirs(output_dir, exist_ok=True)
try:
true_output_dir = snapshot_download(
repo_id,
local_dir=output_dir,
ignore_patterns=ignore_patterns,
token=hf_token,
library_name="llama-toolchain",
def _meta_download(model: "Model", meta_url: str):
from llama_models.sku_list import llama_meta_net_info
from llama_toolchain.common.model_utils import model_local_dir
output_dir = Path(model_local_dir(model))
os.makedirs(output_dir, exist_ok=True)
info = llama_meta_net_info(model)
# I believe we can use some concurrency here if needed but not sure it is worth it
for f in info.files:
output_file = str(output_dir / f)
url = meta_url.replace("*", f"{info.folder}/{f}")
total_size = info.pth_size if "consolidated" in f else 0
cprint(f"Downloading `{f}`...", "white")
downloader = ResumableDownloader(url, output_file, total_size)
asyncio.run(downloader.download())
print(f"\nSuccessfully downloaded model to {output_dir}")
cprint(f"\nMD5 Checksums are at: {output_dir / 'checklist.chk'}", "white")
def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
from llama_models.sku_list import resolve_model
model = resolve_model(args.model_id)
if model is None:
parser.error(f"Model {args.model_id} not found")
return
if args.source == "huggingface":
_hf_download(model, args.hf_token, args.ignore_patterns, parser)
else:
meta_url = args.meta_url
if not meta_url:
meta_url = input(
"Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): "
)
except GatedRepoError:
self.parser.error(
"It looks like you are trying to access a gated repository. Please ensure you "
"have access to the repository and have provided the proper Hugging Face API token "
"using the option `--hf-token` or by running `huggingface-cli login`."
"You can find your token by visiting https://huggingface.co/settings/tokens"
)
except RepositoryNotFoundError:
self.parser.error(
f"Repository '{args.repo_id}' not found on the Hugging Face Hub."
)
except Exception as e:
self.parser.error(e)
print(f"Successfully downloaded model to {true_output_dir}")
def _meta_download(self, model: Model, meta_url: str):
output_dir = Path(DEFAULT_CHECKPOINT_DIR) / model.descriptor()
os.makedirs(output_dir, exist_ok=True)
info = llama_meta_net_info(model)
# I believe we can use some concurrency here if needed but not sure it is worth it
for f in info.files:
output_file = str(output_dir / f)
url = meta_url.replace("*", f"{info.folder}/{f}")
total_size = info.pth_size if "consolidated" in f else 0
cprint(f"Downloading `{f}`...", "white")
downloader = ResumableDownloader(url, output_file, total_size)
asyncio.run(downloader.download())
def _run_download_cmd(self, args: argparse.Namespace):
model = resolve_model(args.model_id)
if model is None:
self.parser.error(f"Model {args.model_id} not found")
return
if args.source == "huggingface":
self._hf_download(model, args.hf_token, args.ignore_patterns)
else:
meta_url = args.meta_url
if not meta_url:
meta_url = input(
"Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): "
)
assert meta_url is not None and "llama3-1.llamameta.net" in meta_url
self._meta_download(model, meta_url)
assert meta_url is not None and "llama3-1.llamameta.net" in meta_url
_meta_download(model, meta_url)
class ResumableDownloader:

View file

@ -1,91 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import os
import textwrap
from pathlib import Path
import pkg_resources
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.utils import DEFAULT_DUMP_DIR
CONFIGS_BASE_DIR = os.path.join(DEFAULT_DUMP_DIR, "configs")
class InferenceConfigure(Subcommand):
"""Llama cli for configuring llama toolchain configs"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"configure",
prog="llama inference configure",
description="Configure llama toolchain inference configs",
epilog=textwrap.dedent(
"""
Example:
llama inference configure
"""
),
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_inference_configure_cmd)
def _add_arguments(self):
pass
def read_user_inputs(self):
checkpoint_dir = input(
"Enter the checkpoint directory for the model (e.g., ~/.llama/checkpoints/Meta-Llama-3-8B/): "
)
model_parallel_size = input(
"Enter model parallel size (e.g., 1 for 8B / 8 for 70B and 405B): "
)
assert model_parallel_size.isdigit() and int(model_parallel_size) in {
1,
8,
}, "model parallel size must be 1 or 8"
return checkpoint_dir, model_parallel_size
def write_output_yaml(self, checkpoint_dir, model_parallel_size, yaml_output_path):
default_conf_path = pkg_resources.resource_filename(
"llama_toolchain", "data/default_inference_config.yaml"
)
with open(default_conf_path, "r") as f:
yaml_content = f.read()
yaml_content = yaml_content.format(
checkpoint_dir=checkpoint_dir,
model_parallel_size=model_parallel_size,
)
with open(yaml_output_path, "w") as yaml_file:
yaml_file.write(yaml_content.strip())
print(f"YAML configuration has been written to {yaml_output_path}")
def _run_inference_configure_cmd(self, args: argparse.Namespace) -> None:
checkpoint_dir, model_parallel_size = self.read_user_inputs()
checkpoint_dir = os.path.expanduser(checkpoint_dir)
assert (
Path(checkpoint_dir).exists() and Path(checkpoint_dir).is_dir()
), f"{checkpoint_dir} does not exist or it not a directory"
os.makedirs(CONFIGS_BASE_DIR, exist_ok=True)
yaml_output_path = Path(CONFIGS_BASE_DIR) / "inference.yaml"
self.write_output_yaml(
checkpoint_dir,
model_parallel_size,
yaml_output_path,
)

View file

@ -1,36 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import textwrap
from llama_toolchain.cli.inference.configure import InferenceConfigure
from llama_toolchain.cli.inference.start import InferenceStart
from llama_toolchain.cli.subcommand import Subcommand
class InferenceParser(Subcommand):
"""Llama cli for inference apis"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"inference",
prog="llama inference",
description="Run inference on a llama model",
epilog=textwrap.dedent(
"""
Example:
llama inference start <options>
"""
),
)
subparsers = self.parser.add_subparsers(title="inference_subcommands")
# Add sub-commandsa
InferenceStart.create(subparsers)
InferenceConfigure.create(subparsers)

View file

@ -1,57 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import textwrap
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.inference.server import main as inference_server_init
class InferenceStart(Subcommand):
"""Llama Inference cli for starting inference server"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"start",
prog="llama inference start",
description="Start an inference server",
epilog=textwrap.dedent(
"""
Example:
llama inference start <options>
"""
),
formatter_class=argparse.RawTextHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_inference_start_cmd)
def _add_arguments(self):
self.parser.add_argument(
"--port",
type=int,
help="Port to run the server on. Defaults to 5000",
default=5000,
)
self.parser.add_argument(
"--disable-ipv6",
action="store_true",
help="Disable IPv6 support",
default=False,
)
self.parser.add_argument(
"--config", type=str, help="Path to config file", default="inference"
)
def _run_inference_start_cmd(self, args: argparse.Namespace) -> None:
inference_server_init(
config_path=args.config,
port=args.port,
disable_ipv6=args.disable_ipv6,
)

View file

@ -6,9 +6,9 @@
import argparse
from llama_toolchain.cli.download import Download
from llama_toolchain.cli.inference.inference import InferenceParser
from llama_toolchain.cli.model.model import ModelParser
from .distribution import DistributionParser
from .download import Download
from .model import ModelParser
class LlamaCLIParser:
@ -28,8 +28,8 @@ class LlamaCLIParser:
# Add sub-commands
Download.create(subparsers)
InferenceParser.create(subparsers)
ModelParser.create(subparsers)
DistributionParser.create(subparsers)
# Import sub-commands from agentic_system if they exist
try:

View file

@ -3,3 +3,5 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .model import ModelParser # noqa

View file

@ -7,21 +7,13 @@
import argparse
import json
from enum import Enum
from llama_models.sku_list import resolve_model
from termcolor import colored
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.cli.table import print_table
class EnumEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Enum):
return obj.value
return super().default(obj)
from llama_toolchain.common.serialize import EnumEncoder
class ModelDescribe(Subcommand):
@ -57,9 +49,9 @@ class ModelDescribe(Subcommand):
rows = [
(
colored("Model", "white", attrs=["bold"]),
colored(model.sku.value, "white", attrs=["bold"]),
colored(model.descriptor(), "white", attrs=["bold"]),
),
("HuggingFace ID", model.huggingface_id or "<Not Available>"),
("HuggingFace ID", model.huggingface_repo or "<Not Available>"),
("Description", model.description_markdown),
("Context Length", f"{model.max_seq_length // 1024}K tokens"),
("Weights format", model.quantization_format.value),

View file

@ -0,0 +1,24 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
from llama_toolchain.cli.subcommand import Subcommand
class ModelDownload(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"download",
prog="llama model download",
description="Download a model from llama.meta.comf or HuggingFace hub",
formatter_class=argparse.RawTextHelpFormatter,
)
from llama_toolchain.cli.download import setup_download_parser
setup_download_parser(self.parser)

View file

@ -5,9 +5,9 @@
# the root directory of this source tree.
import argparse
import textwrap
from llama_toolchain.cli.model.describe import ModelDescribe
from llama_toolchain.cli.model.download import ModelDownload
from llama_toolchain.cli.model.list import ModelList
from llama_toolchain.cli.model.template import ModelTemplate
@ -22,18 +22,13 @@ class ModelParser(Subcommand):
self.parser = subparsers.add_parser(
"model",
prog="llama model",
description="Describe llama model interfaces",
epilog=textwrap.dedent(
"""
Example:
llama model <subcommand> <options>
"""
),
description="Work with llama models",
)
subparsers = self.parser.add_subparsers(title="model_subcommands")
# Add sub-commandsa
ModelTemplate.create(subparsers)
# Add sub-commands
ModelDownload.create(subparsers)
ModelList.create(subparsers)
ModelTemplate.create(subparsers)
ModelDescribe.create(subparsers)

View file

@ -7,14 +7,9 @@
import argparse
import textwrap
from llama_models.llama3_1.api.interface import (
list_jinja_templates,
render_jinja_template,
)
from termcolor import colored
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.cli.table import print_table
class ModelTemplate(Subcommand):
@ -53,6 +48,12 @@ class ModelTemplate(Subcommand):
)
def _run_model_template_cmd(self, args: argparse.Namespace) -> None:
from llama_models.llama3_1.api.interface import (
list_jinja_templates,
render_jinja_template,
)
from llama_toolchain.cli.table import print_table
if args.name:
template, tokens_info = render_jinja_template(args.name)
rendered = ""

View file

@ -45,7 +45,7 @@ def format_row(row, col_widths):
def print_table(rows, headers=None, separate_rows: bool = False):
def itemlen(item):
return len(strip_ansi_colors(item))
return max([len(line) for line in strip_ansi_colors(item).split("\n")])
rows = [[x or "" for x in row] for row in rows]
if not headers: