build: configure ruff from pyproject.toml (#1100)

# What does this PR do? - Remove hardcoded configurations from pre-commit. - Allow configuration to be set via pyproject.toml. - Merge .ruff.toml settings into pyproject.toml. - Ensure the linter and formatter use the defined configuration instead of being overridden by pre-commit. Signed-off-by: Sébastien Han <seb@redhat.com> [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: Sébastien Han <seb@redhat.com>
2025-06-27 18:50:41 +00:00 · 2025-02-14 18:01:57 +01:00 · 2025-02-14 18:01:57 +01:00 · c0ee512980
commit c0ee512980
parent a3cb039e83
14 changed files with 78 additions and 62 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -29,13 +29,7 @@ repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.9.4
    hooks:
-    # Run the linter with import sorting.
    -   id: ruff
-        args: [
-            --fix,
-            --exit-non-zero-on-fix,
-            --select, I,
-        ]
    -   id: ruff-format

 -   repo: https://github.com/adamchainz/blacken-docs
--- a/.ruff.toml
+++ b/.ruff.toml
@ -1,37 +0,0 @@
-# Suggested config from pytorch that we can adapt
-lint.select = ["B", "C", "E" , "F" , "N", "W", "B9"]
-
-line-length = 120
-
-# C408 ignored because we like the dict keyword argument syntax
-# E501 is not flexible enough, we're using B950 instead
-# N812 ignored because import torch.nn.functional as F is PyTorch convention
-# N817 ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
-# E731 allow usage of assigning lambda expressions
-# E701 let black auto-format statements on one line
-# E704 let black auto-format statements on one line
-lint.ignore = [
-    "E203", "E305", "E402", "E501", "E721", "E741", "F405", "F821", "F841",
-    "C408", "E302", "W291", "E303", "N812", "N817", "E731", "E701",
-    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
-    "C901", "C405", "C414", "N803", "N999", "C403", "C416", "B028", "C419", "C401", "B023",
-    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
-    # to line this up with executable bit
-    "EXE001",
-    # random naming hints don't need
-    "N802",
-    # these ignores are from flake8-bugbear; please fix!
-    "B007", "B008"
-]
-
-exclude = [
-    "./.git",
-    "./docs/*",
-    "./build",
-    "./scripts",
-    "./venv",
-    "*.pyi",
-    ".pre-commit-config.yaml",
-    "*.md",
-    ".flake8"
-]
--- a/llama_stack/cli/table.py
+++ b/llama_stack/cli/table.py
@ -26,13 +26,13 @@ def format_row(row, col_widths):
                lines.extend(textwrap.wrap(line, width, break_long_words=False, replace_whitespace=False))
        return lines

-    wrapped = [wrap(item, width) for item, width in zip(row, col_widths)]
+    wrapped = [wrap(item, width) for item, width in zip(row, col_widths, strict=False)]
    max_lines = max(len(subrow) for subrow in wrapped)

    lines = []
    for i in range(max_lines):
        line = []
-        for cell_lines, width in zip(wrapped, col_widths):
+        for cell_lines, width in zip(wrapped, col_widths, strict=False):
            value = cell_lines[i] if i < len(cell_lines) else ""
            line.append(value + " " * (width - len(strip_ansi_colors(value))))
        lines.append("| " + (" | ".join(line)) + " |")
@ -50,14 +50,14 @@ def print_table(rows, headers=None, separate_rows: bool = False, sort_by: Iterab
        rows.sort(key=lambda x: tuple(x[i] for i in sort_by))

    if not headers:
-        col_widths = [max(itemlen(item) for item in col) for col in zip(*rows)]
+        col_widths = [max(itemlen(item) for item in col) for col in zip(*rows, strict=False)]
    else:
        col_widths = [
            max(
                itemlen(header),
                max(itemlen(item) for item in col),
            )
-            for header, col in zip(headers, zip(*rows))
+            for header, col in zip(headers, zip(*rows, strict=False), strict=False)
        ]
    col_widths = [min(w, 80) for w in col_widths]

--- a/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/llama_stack/providers/inline/agents/meta_reference/safety.py
@ -41,7 +41,7 @@ class ShieldRunnerMixin:
                for identifier in identifiers
            ]
        )
-        for identifier, response in zip(identifiers, responses):
+        for identifier, response in zip(identifiers, responses, strict=False):
            if not response.violation:
                continue

--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@ -201,7 +201,9 @@ class MetaReferenceEvalImpl(
            raise ValueError(f"Invalid candidate type: {candidate.type}")

        # scoring with generated_answer
-        score_input_rows = [input_r | generated_r for input_r, generated_r in zip(input_rows, generations)]
+        score_input_rows = [
+            input_r | generated_r for input_r, generated_r in zip(input_rows, generations, strict=False)
+        ]

        if task_config.scoring_params is not None:
            scoring_functions_dict = {
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_env_prefix.py
+++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_env_prefix.py
@ -83,12 +83,6 @@ import sys as _sys
 from contextlib import (  # noqa
    contextmanager as _contextmanager,
 )
-from contextlib import (
-    redirect_stderr as _redirect_stderr,
-)
-from contextlib import (
-    redirect_stdout as _redirect_stdout,
-)
 from multiprocessing.connection import Connection as _Connection

 # Mangle imports to avoid polluting model execution namespace.
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -118,7 +118,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
            return RAGQueryResult(content=None)

        # sort by score
-        chunks, scores = zip(*sorted(zip(chunks, scores), key=lambda x: x[1], reverse=True))
+        chunks, scores = zip(*sorted(zip(chunks, scores, strict=False), key=lambda x: x[1], reverse=True), strict=False)

        tokens = 0
        picked = []
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -103,7 +103,7 @@ class FaissIndex(EmbeddingIndex):

        chunks = []
        scores = []
-        for d, i in zip(distances[0], indices[0]):
+        for d, i in zip(distances[0], indices[0], strict=False):
            if i < 0:
                continue
            chunks.append(self.chunk_by_index[int(i)])
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -80,7 +80,7 @@ class SQLiteVecIndex(EmbeddingIndex):
        try:
            # Start transaction
            cur.execute("BEGIN TRANSACTION")
-            for chunk, emb in zip(chunks, embeddings):
+            for chunk, emb in zip(chunks, embeddings, strict=False):
                # Serialize and insert the chunk metadata.
                chunk_json = chunk.model_dump_json()
                cur.execute(f"INSERT INTO {self.metadata_table} (chunk) VALUES (?)", (chunk_json,))
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -69,7 +69,7 @@ class ChromaIndex(EmbeddingIndex):

        chunks = []
        scores = []
-        for dist, doc in zip(distances, documents):
+        for dist, doc in zip(distances, documents, strict=False):
            try:
                doc = json.loads(doc)
                chunk = Chunk(**doc)
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
            )

        points = []
-        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
+        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
            chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
            points.append(
                PointStruct(
--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@ -88,7 +88,7 @@ class TestVisionModelInference:
        expected_strings_to_check = [
            ["puppy"],
        ]
-        for image, expected_strings in zip(images, expected_strings_to_check):
+        for image, expected_strings in zip(images, expected_strings_to_check, strict=False):
            response = [
                r
                async for r in await inference_impl.chat_completion(
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -132,7 +132,7 @@ def convert_openai_completion_logprobs(
    if logprobs.tokens and logprobs.token_logprobs:
        return [
            TokenLogProbs(logprobs_by_token={token: token_lp})
-            for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs)
+            for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs, strict=False)
        ]
    return None

--- a/pyproject.toml
+++ b/pyproject.toml
@ -76,3 +76,66 @@ license-files = []
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
+
+[tool.ruff]
+line-length = 120
+exclude = [
+    "./.git",
+    "./docs/*",
+    "./build",
+    "./scripts",
+    "./venv",
+    "*.pyi",
+    ".pre-commit-config.yaml",
+    "*.md",
+    ".flake8",
+]
+
+[tool.ruff.lint]
+select = [
+    "B",  # flake8-bugbear
+    "B9", # flake8-bugbear subset
+    "C",  # comprehensions
+    "E",  # pycodestyle
+    "F",  # Pyflakes
+    "N",  # Naming
+    "W",  # Warnings
+    "I",  # isort
+]
+ignore = [
+    "E203",
+    "E305",
+    "E402",
+    "E501", # line too long
+    "E721",
+    "E741",
+    "F405",
+    "F821",
+    "F841",
+    "C408", # ignored because we like the dict keyword argument syntax
+    "E302",
+    "W291",
+    "E303",
+    "N812", # ignored because import torch.nn.functional as F is PyTorch convention
+    "N817", # ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
+    "E731", # allow usage of assigning lambda expressions
+    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
+    "C901",
+    "C405",
+    "C414",
+    "N803",
+    "N999",
+    "C403",
+    "C416",
+    "B028",
+    "C419",
+    "C401",
+    "B023",
+    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
+    # to line this up with executable bit
+    "EXE001",
+    "N802",   # random naming hints don't need
+    # these ignores are from flake8-bugbear; please fix!
+    "B007",
+    "B008",
+]