chore: enable pyupgrade fixes (#1806)

# What does this PR do?

The goal of this PR is code base modernization.

Schema reflection code needed a minor adjustment to handle UnionTypes
and collections.abc.AsyncIterator. (Both are preferred for latest Python
releases.)

Note to reviewers: almost all changes here are automatically generated
by pyupgrade. Some additional unused imports were cleaned up. The only
change worth of note can be found under `docs/openapi_generator` and
`llama_stack/strong_typing/schema.py` where reflection code was updated
to deal with "newer" types.

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
Ihar Hrachyshka 2025-05-01 17:23:50 -04:00 committed by GitHub
parent ffe3d0b2cd
commit 9e6561a1ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
319 changed files with 2843 additions and 3033 deletions

View file

@ -5,7 +5,6 @@
# the root directory of this source tree.
from enum import Enum
from typing import Optional
from pydantic import BaseModel, model_validator
@ -15,8 +14,8 @@ class QuantizationScheme(Enum):
class QuantizationArgs(BaseModel):
scheme: Optional[QuantizationScheme] = None
group_size: Optional[int] = None
scheme: QuantizationScheme | None = None
group_size: int | None = None
spinquant: bool = False
@ -58,32 +57,32 @@ class ModelArgs(BaseModel):
dim: int = -1
n_layers: int = -1
n_heads: int = -1
n_kv_heads: Optional[int] = None
head_dim: Optional[int] = None
n_kv_heads: int | None = None
head_dim: int | None = None
vocab_size: int = -1
multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2
ffn_dim_multiplier: Optional[float] = None
ffn_exp: Optional[float] = None
ffn_dim_multiplier: float | None = None
ffn_exp: float | None = None
norm_eps: float = 1e-5
attention_chunk_size: Optional[int] = None
attention_chunk_size: int | None = None
rope_theta: float = 500000
use_scaled_rope: bool = False
rope_scaling_factor: Optional[float] = None
rope_high_freq_factor: Optional[float] = None
rope_scaling_factor: float | None = None
rope_high_freq_factor: float | None = None
nope_layer_interval: Optional[int] = None # No position encoding in every n layers
nope_layer_interval: int | None = None # No position encoding in every n layers
use_qk_norm: bool = False
# Set to True to enable inference-time temperature tuning (useful for very long context)
attn_temperature_tuning: bool = False
floor_scale: float = 8192.0
attn_scale: float = 0.1
vision_args: Optional[VisionArgs] = None
moe_args: Optional[MoEArgs] = None
quantization_args: Optional[QuantizationArgs] = None
lora_args: Optional[LoRAArgs] = None
vision_args: VisionArgs | None = None
moe_args: MoEArgs | None = None
quantization_args: QuantizationArgs | None = None
lora_args: LoRAArgs | None = None
max_batch_size: int = 32
max_seq_len: int = 2048