fix: Mirror llama4 rope scaling fixes, small model simplify (#1917)

See:
- https://github.com/meta-llama/llama-models/pull/322
- https://github.com/meta-llama/llama-models/pull/320
This commit is contained in:
Ashwin Bharambe 2025-04-09 11:28:45 -07:00 committed by GitHub
parent 770b38f8b5
commit e2299291c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 36 additions and 28 deletions

View file

@ -70,6 +70,9 @@ class ModelArgs(BaseModel):
attention_chunk_size: Optional[int] = None
rope_theta: float = 500000
use_scaled_rope: bool = False
rope_scaling_factor: Optional[float] = None
rope_high_freq_factor: Optional[float] = None
nope_layer_interval: Optional[int] = None # No position encoding in every n layers
use_qk_norm: bool = False
# Set to True to enable inference-time temperature tuning (useful for very long context)
@ -92,4 +95,14 @@ class ModelArgs(BaseModel):
f"n_heads ({self.n_heads}) must be divisible by n_kv_heads ({self.n_kv_heads})"
)
assert self.dim % self.n_heads == 0, f"dim ({self.dim}) must be divisible by n_heads ({self.n_heads})"
if self.use_scaled_rope:
# NOTE: ideally these values should have come from params.json. However, we have
# shipped the models everywhere. Only Llama-4-Scout uses scaled rope and needs these
# specific values.
if self.rope_scaling_factor is None:
self.rope_scaling_factor = 16
if self.rope_high_freq_factor is None:
self.rope_high_freq_factor = 1
return self