mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
fix: Mirror llama4 rope scaling fixes, small model simplify (#1917)
See: - https://github.com/meta-llama/llama-models/pull/322 - https://github.com/meta-llama/llama-models/pull/320
This commit is contained in:
parent
770b38f8b5
commit
e2299291c4
2 changed files with 36 additions and 28 deletions
|
@ -70,6 +70,9 @@ class ModelArgs(BaseModel):
|
|||
attention_chunk_size: Optional[int] = None
|
||||
rope_theta: float = 500000
|
||||
use_scaled_rope: bool = False
|
||||
rope_scaling_factor: Optional[float] = None
|
||||
rope_high_freq_factor: Optional[float] = None
|
||||
|
||||
nope_layer_interval: Optional[int] = None # No position encoding in every n layers
|
||||
use_qk_norm: bool = False
|
||||
# Set to True to enable inference-time temperature tuning (useful for very long context)
|
||||
|
@ -92,4 +95,14 @@ class ModelArgs(BaseModel):
|
|||
f"n_heads ({self.n_heads}) must be divisible by n_kv_heads ({self.n_kv_heads})"
|
||||
)
|
||||
assert self.dim % self.n_heads == 0, f"dim ({self.dim}) must be divisible by n_heads ({self.n_heads})"
|
||||
|
||||
if self.use_scaled_rope:
|
||||
# NOTE: ideally these values should have come from params.json. However, we have
|
||||
# shipped the models everywhere. Only Llama-4-Scout uses scaled rope and needs these
|
||||
# specific values.
|
||||
if self.rope_scaling_factor is None:
|
||||
self.rope_scaling_factor = 16
|
||||
if self.rope_high_freq_factor is None:
|
||||
self.rope_high_freq_factor = 1
|
||||
|
||||
return self
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue