mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: introduce llama4 support (#1877)
As title says. Details in README, elsewhere.
This commit is contained in:
parent
23a99a4b22
commit
b8f1561956
61 changed files with 205222 additions and 6439 deletions
|
@ -19,6 +19,7 @@ from .datatypes import (
|
|||
CheckpointQuantizationFormat,
|
||||
CoreModelId,
|
||||
Model,
|
||||
ModelFamily,
|
||||
SamplingParams,
|
||||
TopPSamplingStrategy,
|
||||
)
|
||||
|
@ -36,7 +37,13 @@ def resolve_model(descriptor: str) -> Optional[Model]:
|
|||
|
||||
def all_registered_models() -> List[Model]:
|
||||
return (
|
||||
llama2_family() + llama3_family() + llama3_1_family() + llama3_2_family() + llama3_3_family() + safety_models()
|
||||
llama2_family()
|
||||
+ llama3_family()
|
||||
+ llama3_1_family()
|
||||
+ llama3_2_family()
|
||||
+ llama3_3_family()
|
||||
+ llama4_family()
|
||||
+ safety_models()
|
||||
)
|
||||
|
||||
|
||||
|
@ -83,6 +90,60 @@ def llama3_3_family() -> List[Model]:
|
|||
]
|
||||
|
||||
|
||||
def llama4_family() -> List[Model]:
|
||||
return [
|
||||
*llama4_base_models(),
|
||||
*llama4_instruct_models(),
|
||||
]
|
||||
|
||||
|
||||
def llama4_base_models() -> List[Model]:
|
||||
return [
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama4_scout_17b_16e,
|
||||
description="Llama 4 Scout (17b 16 experts model)",
|
||||
huggingface_repo="meta-llama/Llama-4-Scout-17B-16E",
|
||||
pth_file_count=8,
|
||||
arch_args={},
|
||||
),
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama4_maverick_17b_128e,
|
||||
description="Llama 4 Maverick (17b 128 experts model)",
|
||||
huggingface_repo="meta-llama/Llama-4-Maverick-17B-128E",
|
||||
pth_file_count=8,
|
||||
arch_args={},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def llama4_instruct_models() -> List[Model]:
|
||||
return [
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama4_scout_17b_16e_instruct,
|
||||
description="Llama 4 Scout (17b 16 experts instruct model)",
|
||||
huggingface_repo="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||
pth_file_count=8,
|
||||
arch_args={},
|
||||
),
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama4_maverick_17b_128e_instruct,
|
||||
description="Llama 4 Maverick (17b 128 experts instruct model)",
|
||||
huggingface_repo="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
|
||||
pth_file_count=8,
|
||||
arch_args={},
|
||||
),
|
||||
Model(
|
||||
core_model_id=CoreModelId.llama4_maverick_17b_128e_instruct,
|
||||
description="Llama 4 Maverick (FP8 quantized)",
|
||||
huggingface_repo="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
quantization_format=CheckpointQuantizationFormat.fp8_mixed,
|
||||
pth_file_count=8,
|
||||
variant="fp8",
|
||||
arch_args={},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def llama2_base_models() -> List[Model]:
|
||||
return [
|
||||
Model(
|
||||
|
@ -989,12 +1050,24 @@ def llama_meta_pth_size(model: Model) -> int:
|
|||
if model.core_model_id not in (
|
||||
CoreModelId.llama3_1_405b,
|
||||
CoreModelId.llama3_1_405b_instruct,
|
||||
CoreModelId.llama4_maverick_17b_128e,
|
||||
CoreModelId.llama4_maverick_17b_128e_instruct,
|
||||
):
|
||||
return 0
|
||||
|
||||
if model.pth_file_count == 16:
|
||||
return 51268302389
|
||||
elif model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
|
||||
return 60903742309
|
||||
else:
|
||||
return 101470976045
|
||||
if model.model_family == ModelFamily.llama3_1:
|
||||
if model.pth_file_count == 16:
|
||||
return 51268302389
|
||||
elif model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
|
||||
return 60903742309
|
||||
else:
|
||||
return 101470976045
|
||||
|
||||
if model.model_family == ModelFamily.llama4:
|
||||
if model.core_model_id == CoreModelId.llama4_maverick_17b_128e:
|
||||
return 100458118386
|
||||
elif model.core_model_id == CoreModelId.llama4_maverick_17b_128e_instruct:
|
||||
if model.quantization_format == CheckpointQuantizationFormat.fp8_mixed:
|
||||
return 54121549657
|
||||
else:
|
||||
return 100426653046
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue