fix and unignore ruff DTZ005 FBT001 FBT002

janosh · janosh · commit 0249327b6b5a · 2024-05-15T15:22:34.000-04:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.2
+    rev: v0.4.4
     hooks:
       - id: ruff
         args: [--fix]
@@ -57,7 +57,7 @@ repos:
         exclude: ^(site/src/figs/.+\.svelte|data/wbm/20.+\..+|site/src/(routes|figs).+\.(yaml|json)|changelog.md)$
 
   - repo: https://github.com/pre-commit/mirrors-eslint
-    rev: v9.1.1
+    rev: v9.2.0
     hooks:
       - id: eslint
         types: [file]
@@ -71,15 +71,15 @@ repos:
           - typescript-eslint
 
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.28.2
+    rev: 0.28.3
     hooks:
       - id: check-jsonschema
         files: ^models/(.+)/\1.*\.yml$
         args: [--schemafile, tests/model-schema.yml]
       - id: check-github-actions
 
   - repo: https://github.com/RobertCraigie/pyright-python
-    rev: v1.1.360
+    rev: v1.1.363
     hooks:
       - id: pyright
         args: [--level, error]
diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py
@@ -3,7 +3,7 @@
 import json
 import os
 import warnings
-from datetime import datetime
+from datetime import UTC, datetime
 from importlib.metadata import Distribution
 
 import matplotlib.pyplot as plt
@@ -43,7 +43,7 @@
 # threshold on hull distance for a material to be considered stable
 STABILITY_THRESHOLD = 0
 
-timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
+timestamp = f"{datetime.now(tz=UTC):%Y-%m-%d@%H-%M-%S}"
 today = timestamp.split("@")[0]
 
 # filter pymatgen warnings that spam the logs when e.g. applying corrections to
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -51,6 +51,7 @@ def as_dict_handler(obj: Any) -> dict[str, Any] | None:
 
 def load(
     key: str,
+    *,
     version: str = figshare_versions[-1],
     cache_dir: str | Path = default_cache_dir,
     hydrate: bool = False,
@@ -149,6 +150,7 @@ def load(
 
 def glob_to_df(
     pattern: str,
+    *,
     reader: Callable[[Any], pd.DataFrame] | None = None,
     pbar: bool = True,
     **kwargs: Any,
diff --git a/matbench_discovery/energy.py b/matbench_discovery/energy.py
@@ -16,7 +16,7 @@
 
 
 def get_elemental_ref_entries(
-    entries: Sequence[EntryLike], verbose: bool = True
+    entries: Sequence[EntryLike], *, verbose: bool = True
 ) -> dict[str, Entry]:
     """Get the lowest energy pymatgen Entry for each element in a list of entries.
 
diff --git a/matbench_discovery/enums.py b/matbench_discovery/enums.py
@@ -7,7 +7,7 @@
 
 
 class LabelEnum(StrEnum):
-    """StrEnum with optional label and description attributes plus dict() method."""
+    """StrEnum with optional label and description attributes plus dict() methods."""
 
     def __new__(
         cls, val: str, label: str | None = None, desc: str | None = None
@@ -153,7 +153,7 @@ class Open(LabelEnum):
 
 @unique
 class TestSubset(LabelEnum):
-    """Test set subsets."""
+    """Which subset of the test data to use for evaluation."""
 
     uniq_protos = "uniq_protos", "Unique Structure Prototypes"
     ten_k_most_stable = "10k_most_stable", "10k Most Stable"
diff --git a/matbench_discovery/metrics.py b/matbench_discovery/metrics.py
@@ -17,6 +17,7 @@
 def classify_stable(
     e_above_hull_true: pd.Series,
     e_above_hull_pred: pd.Series,
+    *,
     stability_threshold: float | None = 0,
     fillna: bool = True,
 ) -> tuple[pd.Series, pd.Series, pd.Series, pd.Series]:
@@ -69,6 +70,7 @@ def classify_stable(
 def stable_metrics(
     each_true: Sequence[float],
     each_pred: Sequence[float],
+    *,
     stability_threshold: float = STABILITY_THRESHOLD,
     fillna: bool = True,
 ) -> dict[str, float]:
@@ -95,7 +97,10 @@ def stable_metrics(
             Recall, Accuracy, F1, TPR, FPR, TNR, FNR, MAE, RMSE, R2.
     """
     n_true_pos, n_false_neg, n_false_pos, n_true_neg = map(
-        sum, classify_stable(each_true, each_pred, stability_threshold, fillna)
+        sum,
+        classify_stable(
+            each_true, each_pred, stability_threshold=stability_threshold, fillna=fillna
+        ),
     )
 
     n_total_pos = n_true_pos + n_false_neg
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -108,7 +108,9 @@ def hist_classified_stable_vs_hull_dist(
         df.groupby(kwargs["facet_col"]) if "facet_col" in kwargs else [(None, df)]
     ):
         true_pos, false_neg, false_pos, true_neg = classify_stable(
-            df_group[each_true_col], df_group[each_pred_col], stability_threshold
+            df_group[each_true_col],
+            df_group[each_pred_col],
+            stability_threshold=stability_threshold,
         )
 
         # switch between hist of DFT-computed and model-predicted convex hull distance
@@ -264,6 +266,7 @@ def hist_classified_stable_vs_hull_dist(
 def rolling_mae_vs_hull_dist(
     e_above_hull_true: pd.Series,
     e_above_hull_preds: pd.DataFrame | dict[str, pd.Series],
+    *,
     df_rolling_err: pd.DataFrame | None = None,
     df_err_std: pd.DataFrame | None = None,
     window: float = 0.04,
@@ -567,6 +570,7 @@ def rolling_mae_vs_hull_dist(
 def cumulative_metrics(
     e_above_hull_true: pd.Series,
     df_preds: pd.DataFrame,
+    *,
     metrics: Sequence[str] = ("Precision", "Recall"),
     stability_threshold: float = 0,  # set stability threshold as distance to convex
     # hull in eV / atom, usually 0 or 0.1 eV
@@ -635,7 +639,10 @@ def cumulative_metrics(
         each_true = e_above_hull_true.loc[each_pred.index]
 
         true_pos_cum, false_neg_cum, false_pos_cum, _true_neg_cum = map(
-            np.cumsum, classify_stable(each_true, each_pred, stability_threshold)
+            np.cumsum,
+            classify_stable(
+                each_true, each_pred, stability_threshold=stability_threshold
+            ),
         )
 
         # precision aka positive predictive value (PPV)
diff --git a/matbench_discovery/preds.py b/matbench_discovery/preds.py
@@ -66,6 +66,7 @@ class PredFiles(Files):
 
 
 def load_df_wbm_with_preds(
+    *,
     models: Sequence[str] = (*PRED_FILES,),
     pbar: bool = True,
     id_col: str = Key.mat_id,
diff --git a/models/alignn/train_alignn.py b/models/alignn/train_alignn.py
@@ -95,6 +95,7 @@
 
 def df_to_loader(
     df: pd.DataFrame,
+    *,
     batch_size: int = 128,
     line_graph: bool = True,
     pin_memory: bool = False,
diff --git a/models/cgcnn/plot_structure_perturbation.py b/models/cgcnn/plot_structure_perturbation.py
@@ -33,6 +33,6 @@
 
 # %%
 fig, axs = plt.subplots(3, 4, figsize=(12, 10))
-for idx, ax in enumerate(axs.flat, 1):
+for idx, ax in enumerate(axs.flat, start=1):
     plot_structure_2d(perturb_structure(struct), ax=ax)
     ax.set(title=f"perturbation {idx}")
diff --git a/models/chgnet/analyze_chgnet.py b/models/chgnet/analyze_chgnet.py
@@ -79,7 +79,7 @@
 struct_col = Key.init_struct
 
 fig.suptitle(f"{n_struct} {struct_col} {title}", fontsize=16, fontweight="bold", y=1.05)
-for idx, row in enumerate(df_cse.loc[df_diff.index].itertuples(), 1):
+for idx, row in enumerate(df_cse.loc[df_diff.index].itertuples(), start=1):
     struct = Structure.from_dict(getattr(row, struct_col))
     ax = plot_structure_2d(struct, ax=axs.flat[idx - 1])
     _, spg_num = struct.get_space_group_info()
diff --git a/pyproject.toml b/pyproject.toml
@@ -90,14 +90,10 @@ ignore = [
   "C408",    # unnecessary-collection-call
   "C901",
   "COM812",
-  "D100",    # undocumented-public-module
   "D205",    # blank-line-after-summary
-  "DTZ005",
   "E731",    # lambda-assignment
   "EM101",
   "EM102",
-  "FBT001",
-  "FBT002",
   "FIX002",
   "INP001",
   "ISC001",
@@ -125,9 +121,9 @@ isort.known-third-party = ["wandb"]
 isort.split-on-trailing-comma = false
 
 [tool.ruff.lint.per-file-ignores]
-"tests/*" = ["D", "S101"]
-"matbench_discovery/plots.py" = ["ERA001"] # allow commented out code
-"matbench_discovery/preds.py" = ["ERA001"] # allow commented out code
+"tests/*" = ["D", "FBT001", "FBT002", "S101"]
+"matbench_discovery/plots.py" = ["ERA001"]    # allow commented out code
+"matbench_discovery/preds.py" = ["ERA001"]    # allow commented out code
 "scripts/*" = ["D", "ERA001", "S101"]
 "models/*" = ["D", "ERA001", "S101"]
 "data/*" = ["ERA001", "S101"]
diff --git a/scripts/analyze_model_failure_cases.py b/scripts/analyze_model_failure_cases.py
@@ -51,7 +51,7 @@
     )
     fig.suptitle(title, fontsize=20, fontweight="bold", y=1.05)
 
-    for idx, (mat_id, error) in enumerate(errors.items(), 1):
+    for idx, (mat_id, error) in enumerate(errors.items(), start=1):
         struct = df_cse[struct_col].loc[mat_id]
         if "structure" in struct:
             struct = struct["structure"]
diff --git a/scripts/model_figs/parity_energy_models.py b/scripts/model_figs/parity_energy_models.py
@@ -176,7 +176,7 @@
 y_title = fig.layout.yaxis.title.text
 
 # iterate over subplots and set new title
-for idx, anno in enumerate(fig.layout.annotations, 1):
+for idx, anno in enumerate(fig.layout.annotations, start=1):
     traces = [t for t in fig.data if t.xaxis == f"x{idx if idx > 1 else ''}"]
     # assert len(traces) in (0, 4), f"Plots must have 0 or 4 traces, got {len(traces)=}"
 
diff --git a/scripts/model_figs/per_element_errors.py b/scripts/model_figs/per_element_errors.py
@@ -66,7 +66,7 @@
 df_elem_err.index.name = "symbol"
 
 
-# %%
+# %% plot number of structures containing each element in MP and WBM
 for label, srs in (
     ("MP", df_elem_err[train_count_col]),
     ("WBM", df_frac_comp.where(pd.isna, 1).sum()),
diff --git a/scripts/model_figs/rolling_mae_vs_hull_dist_wbm_batches.py b/scripts/model_figs/rolling_mae_vs_hull_dist_wbm_batches.py
@@ -63,7 +63,7 @@
 assert len(markers) == 5  # number of iterations of element substitution in WBM data set
 model = Model.chgnet
 
-for idx, marker in enumerate(markers, 1):
+for idx, marker in enumerate(markers, start=1):
     # select all rows from WBM step=idx
     df_step = df_preds[df_preds.index.str.startswith(f"wbm-{idx}-")]
     df_each_step = df_each_pred[df_each_pred.index.str.startswith(f"wbm-{idx}-")]
diff --git a/scripts/project_compositions.py b/scripts/project_compositions.py
@@ -2,7 +2,7 @@
 
 # %%
 import os
-from datetime import datetime
+from datetime import UTC, datetime
 from typing import Any, Literal
 
 import numpy as np
@@ -39,7 +39,7 @@
 print(f"{data_path=}")
 print(f"{out_dim=}")
 print(f"{projection_type=}")
-start_time = datetime.now()
+start_time = datetime.now(tz=UTC)
 print(f"job started at {start_time:%Y-%m-%d %H:%M:%S}")
 df_in = pd.read_csv(data_path, na_filter=False).set_index(Key.mat_id)
 
@@ -92,7 +92,7 @@ def sum_one_hot_elem(formula: str) -> np.ndarray[Any, np.int64]:
 df_in[out_cols].to_csv(out_path)
 
 print(f"Wrote projections to {out_path!r}")
-end_time = datetime.now()
+end_time = datetime.now(tz=UTC)
 print(
     f"Job finished at {end_time:%Y-%m-%d %H:%M:%S} and took "
     f"{(end_time - start_time).seconds} sec"
diff --git a/scripts/update_wandb_runs.py b/scripts/update_wandb_runs.py
@@ -34,7 +34,7 @@
 updated_runs: list[Run] = []
 wet_run = input("Wet run or dry run? [w/d] ").lower().startswith("w")
 
-for idx, run in enumerate(runs, 1):
+for idx, run in enumerate(runs, start=1):
     old_config, new_config = run.config.copy(), run.config.copy()
 
     new_display_name = run.display_name.replace(
diff --git a/scripts/upload_to_figshare.py b/scripts/upload_to_figshare.py
@@ -29,7 +29,9 @@
 BASE_URL = "https://api.figshare.com/v2"
 
 
-def make_request(method: str, url: str, data: Any = None, binary: bool = False) -> Any:
+def make_request(
+    method: str, url: str, *, data: Any = None, binary: bool = False
+) -> Any:
     """Make a token-authorized HTTP request to the Figshare API."""
     headers = {"Authorization": f"token {TOKEN}"}
     if data is not None and not binary:
diff --git a/scripts/wbm_umap_projection.py b/scripts/wbm_umap_projection.py
@@ -40,6 +40,7 @@
 # %%
 def featurize_dataframe(
     df_in: pd.DataFrame | pd.Series,
+    *,
     struct_col: str = "structure",
     ignore_errors: bool = True,
     chunk_size: int = 30,

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@`
`51`	`51`	`)`
`52`	`52`	`fig.suptitle(title, fontsize=20, fontweight="bold", y=1.05)`
`53`	`53`
`54`		`- for idx, (mat_id, error) in enumerate(errors.items(), 1):`
	`54`	`+ for idx, (mat_id, error) in enumerate(errors.items(), start=1):`
`55`	`55`	`struct = df_cse[struct_col].loc[mat_id]`
`56`	`56`	`if "structure" in struct:`
`57`	`57`	`struct = struct["structure"]`