build: format codebase imports using ruff linter (#1028)

# What does this PR do? - Configured ruff linter to automatically fix import sorting issues. - Set --exit-non-zero-on-fix to ensure non-zero exit code when fixes are applied. - Enabled the 'I' selection to focus on import-related linting rules. - Ran the linter, and formatted all codebase imports accordingly. - Removed the black dep from the "dev" group since we use ruff Signed-off-by: Sébastien Han <seb@redhat.com> [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) [//]: # (- [ ] Added a Changelog entry if the change is significant) Signed-off-by: Sébastien Han <seb@redhat.com>
2025-02-13 19:06:21 +01:00 · 2025-02-13 19:06:21 +01:00 · e4a1579e63
commit e4a1579e63
parent 1527c30107
140 changed files with 139 additions and 243 deletions
--- a/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/llama_stack/providers/inline/inference/meta_reference/config.py
@ -9,7 +9,6 @@ from typing import Any, Dict, Optional
 from pydantic import BaseModel, field_validator

 from llama_stack.apis.inference import QuantizationConfig
-
 from llama_stack.providers.utils.inference import supported_inference_models


--- a/llama_stack/providers/inline/inference/meta_reference/generation.py
+++ b/llama_stack/providers/inline/inference/meta_reference/generation.py
@ -37,7 +37,6 @@ from llama_models.llama3.reference_impl.multimodal.model import (
    CrossAttentionTransformer,
 )
 from llama_models.sku_list import resolve_model
-
 from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
 from pydantic import BaseModel

@ -47,7 +46,6 @@ from llama_stack.apis.inference import (
    ResponseFormat,
    ResponseFormatType,
 )
-
 from llama_stack.distribution.utils.model_utils import model_local_dir
 from llama_stack.providers.utils.inference.prompt_adapter import (
    ChatCompletionRequestWithRawContent,
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@ -46,8 +46,8 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
    SentenceTransformerEmbeddingMixin,
 )
 from llama_stack.providers.utils.inference.model_registry import (
-    build_model_alias,
    ModelRegistryHelper,
+    build_model_alias,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    augment_content_with_response_format_prompt,
--- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
+++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
@ -22,16 +22,13 @@ from typing import Callable, Generator, Literal, Optional, Union

 import torch
 import zmq
-
 from fairscale.nn.model_parallel.initialize import (
    get_model_parallel_group,
    get_model_parallel_rank,
    get_model_parallel_src_rank,
 )
-
 from pydantic import BaseModel, Field
-
-from torch.distributed.launcher.api import elastic_launch, LaunchConfig
+from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 from typing_extensions import Annotated

 from llama_stack.providers.utils.inference.prompt_adapter import (
--- a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls.py
+++ b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls.py
@ -8,7 +8,6 @@
 # This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.

 import collections
-
 import logging
 from typing import Optional, Type

@ -23,7 +22,7 @@ except ImportError:
    raise

 import torch
-from torch import nn, Tensor
+from torch import Tensor, nn


 class Fp8ScaledWeights:
--- a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py
+++ b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py
@ -10,9 +10,9 @@
 import unittest

 import torch
-
-from fp8_impls import ffn_swiglu_fp8_dynamic, FfnQuantizeMode, quantize_fp8
-from hypothesis import given, settings, strategies as st
+from fp8_impls import FfnQuantizeMode, ffn_swiglu_fp8_dynamic, quantize_fp8
+from hypothesis import given, settings
+from hypothesis import strategies as st
 from torch import Tensor


--- a/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py
+++ b/llama_stack/providers/inline/inference/meta_reference/quantization/loader.py
@ -12,18 +12,13 @@ import os
 from typing import Any, Dict, List, Optional

 import torch
-
 from fairscale.nn.model_parallel.layers import ColumnParallelLinear, RowParallelLinear
 from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region
-
 from llama_models.datatypes import CheckpointQuantizationFormat
-
 from llama_models.llama3.api.args import ModelArgs
 from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock
 from llama_models.sku_list import resolve_model
-
-from torch import nn, Tensor
-
+from torch import Tensor, nn
 from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear

 from llama_stack.apis.inference import QuantizationType
--- a/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/quantize_checkpoint.py
+++ b/llama_stack/providers/inline/inference/meta_reference/quantization/scripts/quantize_checkpoint.py
@ -16,14 +16,12 @@ from pathlib import Path
 from typing import Optional

 import fire
-
 import torch
 from fairscale.nn.model_parallel.initialize import (
    get_model_parallel_rank,
    initialize_model_parallel,
    model_parallel_is_initialized,
 )
-
 from llama_models.llama3.api.args import ModelArgs
 from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock