build: configure ruff from pyproject.toml (#1100)

# What does this PR do?

- Remove hardcoded configurations from pre-commit.
- Allow configuration to be set via pyproject.toml.
- Merge .ruff.toml settings into pyproject.toml.
- Ensure the linter and formatter use the defined configuration instead
of being overridden by pre-commit.

Signed-off-by: Sébastien Han <seb@redhat.com>

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-02-14 18:01:57 +01:00 committed by GitHub
parent a3cb039e83
commit c0ee512980
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 78 additions and 62 deletions

View file

@ -29,13 +29,7 @@ repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.4
hooks:
# Run the linter with import sorting.
- id: ruff
args: [
--fix,
--exit-non-zero-on-fix,
--select, I,
]
- id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs

View file

@ -1,37 +0,0 @@
# Suggested config from pytorch that we can adapt
lint.select = ["B", "C", "E" , "F" , "N", "W", "B9"]
line-length = 120
# C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead
# N812 ignored because import torch.nn.functional as F is PyTorch convention
# N817 ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
# E731 allow usage of assigning lambda expressions
# E701 let black auto-format statements on one line
# E704 let black auto-format statements on one line
lint.ignore = [
"E203", "E305", "E402", "E501", "E721", "E741", "F405", "F821", "F841",
"C408", "E302", "W291", "E303", "N812", "N817", "E731", "E701",
# These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
"C901", "C405", "C414", "N803", "N999", "C403", "C416", "B028", "C419", "C401", "B023",
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
# to line this up with executable bit
"EXE001",
# random naming hints don't need
"N802",
# these ignores are from flake8-bugbear; please fix!
"B007", "B008"
]
exclude = [
"./.git",
"./docs/*",
"./build",
"./scripts",
"./venv",
"*.pyi",
".pre-commit-config.yaml",
"*.md",
".flake8"
]

View file

@ -26,13 +26,13 @@ def format_row(row, col_widths):
lines.extend(textwrap.wrap(line, width, break_long_words=False, replace_whitespace=False))
return lines
wrapped = [wrap(item, width) for item, width in zip(row, col_widths)]
wrapped = [wrap(item, width) for item, width in zip(row, col_widths, strict=False)]
max_lines = max(len(subrow) for subrow in wrapped)
lines = []
for i in range(max_lines):
line = []
for cell_lines, width in zip(wrapped, col_widths):
for cell_lines, width in zip(wrapped, col_widths, strict=False):
value = cell_lines[i] if i < len(cell_lines) else ""
line.append(value + " " * (width - len(strip_ansi_colors(value))))
lines.append("| " + (" | ".join(line)) + " |")
@ -50,14 +50,14 @@ def print_table(rows, headers=None, separate_rows: bool = False, sort_by: Iterab
rows.sort(key=lambda x: tuple(x[i] for i in sort_by))
if not headers:
col_widths = [max(itemlen(item) for item in col) for col in zip(*rows)]
col_widths = [max(itemlen(item) for item in col) for col in zip(*rows, strict=False)]
else:
col_widths = [
max(
itemlen(header),
max(itemlen(item) for item in col),
)
for header, col in zip(headers, zip(*rows))
for header, col in zip(headers, zip(*rows, strict=False), strict=False)
]
col_widths = [min(w, 80) for w in col_widths]

View file

@ -41,7 +41,7 @@ class ShieldRunnerMixin:
for identifier in identifiers
]
)
for identifier, response in zip(identifiers, responses):
for identifier, response in zip(identifiers, responses, strict=False):
if not response.violation:
continue

View file

@ -201,7 +201,9 @@ class MetaReferenceEvalImpl(
raise ValueError(f"Invalid candidate type: {candidate.type}")
# scoring with generated_answer
score_input_rows = [input_r | generated_r for input_r, generated_r in zip(input_rows, generations)]
score_input_rows = [
input_r | generated_r for input_r, generated_r in zip(input_rows, generations, strict=False)
]
if task_config.scoring_params is not None:
scoring_functions_dict = {

View file

@ -83,12 +83,6 @@ import sys as _sys
from contextlib import ( # noqa
contextmanager as _contextmanager,
)
from contextlib import (
redirect_stderr as _redirect_stderr,
)
from contextlib import (
redirect_stdout as _redirect_stdout,
)
from multiprocessing.connection import Connection as _Connection
# Mangle imports to avoid polluting model execution namespace.

View file

@ -118,7 +118,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
return RAGQueryResult(content=None)
# sort by score
chunks, scores = zip(*sorted(zip(chunks, scores), key=lambda x: x[1], reverse=True))
chunks, scores = zip(*sorted(zip(chunks, scores, strict=False), key=lambda x: x[1], reverse=True), strict=False)
tokens = 0
picked = []

View file

@ -103,7 +103,7 @@ class FaissIndex(EmbeddingIndex):
chunks = []
scores = []
for d, i in zip(distances[0], indices[0]):
for d, i in zip(distances[0], indices[0], strict=False):
if i < 0:
continue
chunks.append(self.chunk_by_index[int(i)])

View file

@ -80,7 +80,7 @@ class SQLiteVecIndex(EmbeddingIndex):
try:
# Start transaction
cur.execute("BEGIN TRANSACTION")
for chunk, emb in zip(chunks, embeddings):
for chunk, emb in zip(chunks, embeddings, strict=False):
# Serialize and insert the chunk metadata.
chunk_json = chunk.model_dump_json()
cur.execute(f"INSERT INTO {self.metadata_table} (chunk) VALUES (?)", (chunk_json,))

View file

@ -69,7 +69,7 @@ class ChromaIndex(EmbeddingIndex):
chunks = []
scores = []
for dist, doc in zip(distances, documents):
for dist, doc in zip(distances, documents, strict=False):
try:
doc = json.loads(doc)
chunk = Chunk(**doc)

View file

@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
)
points = []
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
points.append(
PointStruct(

View file

@ -88,7 +88,7 @@ class TestVisionModelInference:
expected_strings_to_check = [
["puppy"],
]
for image, expected_strings in zip(images, expected_strings_to_check):
for image, expected_strings in zip(images, expected_strings_to_check, strict=False):
response = [
r
async for r in await inference_impl.chat_completion(

View file

@ -132,7 +132,7 @@ def convert_openai_completion_logprobs(
if logprobs.tokens and logprobs.token_logprobs:
return [
TokenLogProbs(logprobs_by_token={token: token_lp})
for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs)
for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs, strict=False)
]
return None

View file

@ -76,3 +76,66 @@ license-files = []
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.ruff]
line-length = 120
exclude = [
"./.git",
"./docs/*",
"./build",
"./scripts",
"./venv",
"*.pyi",
".pre-commit-config.yaml",
"*.md",
".flake8",
]
[tool.ruff.lint]
select = [
"B", # flake8-bugbear
"B9", # flake8-bugbear subset
"C", # comprehensions
"E", # pycodestyle
"F", # Pyflakes
"N", # Naming
"W", # Warnings
"I", # isort
]
ignore = [
"E203",
"E305",
"E402",
"E501", # line too long
"E721",
"E741",
"F405",
"F821",
"F841",
"C408", # ignored because we like the dict keyword argument syntax
"E302",
"W291",
"E303",
"N812", # ignored because import torch.nn.functional as F is PyTorch convention
"N817", # ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
"E731", # allow usage of assigning lambda expressions
# These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
"C901",
"C405",
"C414",
"N803",
"N999",
"C403",
"C416",
"B028",
"C419",
"C401",
"B023",
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
# to line this up with executable bit
"EXE001",
"N802", # random naming hints don't need
# these ignores are from flake8-bugbear; please fix!
"B007",
"B008",
]