mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-02 14:20:01 +00:00
rename quant types to use _mixed naming
This commit is contained in:
parent
b239c57c54
commit
76004eacb4
3 changed files with 11 additions and 11 deletions
|
|
@ -91,7 +91,7 @@ def convert_to_quantized_model(
|
|||
log_status(f"Rank {rank}: Quantizing int4 weights from bf16")
|
||||
|
||||
def apply_quantization(_, weight):
|
||||
return quantize_int4(weight, output_device=torch.device("cuda"))
|
||||
return quantize_int4(weight, fp8_activation_scale_ub, output_device=torch.device("cuda"))
|
||||
|
||||
else:
|
||||
fp8_scales_path = os.path.join(checkpoint_dir, f"fp8_scales_{rank}.pt")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue