Use new definitions of Model / SKU

This commit is contained in:
Ashwin Bharambe 2024-07-31 11:36:16 -07:00
parent 156bfa0e15
commit 09cf3fe78b
8 changed files with 63 additions and 65 deletions

View file

@ -9,7 +9,7 @@ import json
from enum import Enum
from llama_models.sku_list import llama3_1_model_list
from llama_models.sku_list import resolve_model
from termcolor import colored
@ -47,20 +47,13 @@ class ModelDescribe(Subcommand):
)
def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
models = llama3_1_model_list()
by_id = {model.sku.value: model for model in models}
if args.model_id not in by_id:
print(
model = resolve_model(args.model_id)
if model is None:
self.parser.error(
f"Model {args.model_id} not found; try 'llama model list' for a list of available models."
)
return
model = by_id[args.model_id]
sampling_params = model.recommended_sampling_params.dict()
for k in ("max_tokens", "repetition_penalty"):
del sampling_params[k]
rows = [
(
colored("Model", "white", attrs=["bold"]),
@ -70,13 +63,20 @@ class ModelDescribe(Subcommand):
("Description", model.description_markdown),
("Context Length", f"{model.max_seq_length // 1024}K tokens"),
("Weights format", model.quantization_format.value),
(
"Recommended sampling params",
json.dumps(sampling_params, cls=EnumEncoder, indent=4),
),
("Model params.json", json.dumps(model.model_args, indent=4)),
]
if model.recommended_sampling_params is not None:
sampling_params = model.recommended_sampling_params.dict()
for k in ("max_tokens", "repetition_penalty"):
del sampling_params[k]
rows.append(
(
"Recommended sampling params",
json.dumps(sampling_params, cls=EnumEncoder, indent=4),
)
)
print_table(
rows,
separate_rows=True,

View file

@ -6,7 +6,7 @@
import argparse
from llama_models.sku_list import llama3_1_model_list
from llama_models.sku_list import all_registered_models
from llama_toolchain.cli.subcommand import Subcommand
from llama_toolchain.cli.table import print_table
@ -30,21 +30,22 @@ class ModelList(Subcommand):
pass
def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
models = llama3_1_model_list()
headers = [
"Model ID",
"HuggingFace ID",
"Model Descriptor",
"HuggingFace Repo",
"Context Length",
"Hardware Requirements",
]
rows = []
for model in models:
for model in all_registered_models():
req = model.hardware_requirements
descriptor = model.descriptor()
rows.append(
[
model.sku.value,
model.huggingface_id,
descriptor,
model.huggingface_repo,
f"{model.max_seq_length // 1024}K",
f"{req.gpu_count} GPU{'s' if req.gpu_count > 1 else ''}, each >= {req.memory_gb_per_gpu}GB VRAM",
]