fix trainable params in test_m3gnet.py

janosh · janosh · commit e203f8f7ab00 · 2023-12-28T11:29:46.000+01:00
update site to sveltekit v2, vite v5
ruff unignore NPY002 and fix violations
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.7
+    rev: v0.1.9
     hooks:
       - id: ruff
         args: [--fix]
@@ -30,7 +30,7 @@ repos:
       - id: trailing-whitespace
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.7.1
+    rev: v1.8.0
     hooks:
       - id: mypy
         additional_dependencies: [types-pyyaml, types-requests]
@@ -45,7 +45,7 @@ repos:
         args: [--ignore-words-list, "nd,te,fpr", --check-filenames]
 
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v4.0.0-alpha.3
+    rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
         args: [--write] # edit files in-place
@@ -56,7 +56,7 @@ repos:
         exclude: ^(site/src/figs/.+\.svelte|data/wbm/20.+\..+|site/src/routes/.+\.(yaml|json)|changelog.md)$
 
   - repo: https://github.com/pre-commit/mirrors-eslint
-    rev: v8.55.0
+    rev: v8.56.0
     hooks:
       - id: eslint
         types: [file]
diff --git a/matbench_discovery/structure.py b/matbench_discovery/structure.py
@@ -10,7 +10,7 @@
 __author__ = "Janosh Riebesell"
 __date__ = "2022-12-02"
 
-np.random.seed(0)  # ensure reproducible structure perturbations
+rng = np.random.default_rng(0)  # ensure reproducible structure perturbations
 
 
 def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure:
@@ -29,8 +29,8 @@ def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure:
     """
     perturbed = struct.copy()
     for site in perturbed:
-        magnitude = np.random.weibull(gamma)
-        vec = np.random.randn(3)  # TODO maybe make func recursive to deal with 0-vector
+        magnitude = rng.weibull(gamma)
+        vec = rng.normal(3)  # TODO maybe make func recursive to deal with 0-vector
         vec /= np.linalg.norm(vec)  # unit vector
         site.coords += vec * magnitude
         site.to_unit_cell(in_place=True)
@@ -42,7 +42,7 @@ def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure:
     import matplotlib.pyplot as plt
 
     gamma = 1.5
-    samples = np.array([np.random.weibull(gamma) for _ in range(10000)])
+    samples = np.array([rng.weibull(gamma) for _ in range(10_000)])
     mean = samples.mean()
 
     # reproduces the dist in https://www.nature.com/articles/s41524-022-00891-8#Fig5
diff --git a/models/cgcnn/plot_structure_perturbation.py b/models/cgcnn/plot_structure_perturbation.py
@@ -10,9 +10,11 @@
 __author__ = "Janosh Riebesell"
 __date__ = "2022-12-02"
 
+rng = np.random.default_rng(0)
+
 
 # %%
-ax = pd.Series(np.random.weibull(1.5, 100000)).hist(bins=100)
+ax = pd.Series(rng.weibull(1.5, 100_000)).hist(bins=100)
 title = "Distribution of perturbation magnitudes"
 ax.set(xlabel="magnitude of perturbation", ylabel="count", title=title)
 
diff --git a/models/chgnet/join_chgnet_results.py b/models/chgnet/join_chgnet_results.py
@@ -30,7 +30,7 @@
 # %%
 module_dir = os.path.dirname(__file__)
 task_type = "IS2RE"
-date = "2023-10-23"
+date = "2023-12-21"
 glob_pattern = f"{date}-chgnet-*-wbm-{task_type}*/*.json.gz"
 file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
 print(f"Found {len(file_paths):,} files for {glob_pattern = }")
diff --git a/models/m3gnet/test_m3gnet.py b/models/m3gnet/test_m3gnet.py
@@ -32,18 +32,17 @@
 # direct: DIRECT cluster sampling, ms: manual sampling
 model_type: Literal["orig", "direct", "manual-sampling"] = "orig"
 # set large job array size for smaller data splits and faster testing/debugging
-slurm_array_task_count = 100
+slurm_array_task_count = 50
 job_name = f"m3gnet-{model_type}-wbm-{task_type}"
 out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
 
 slurm_vars = slurm_submit(
     job_name=job_name,
     out_dir=out_dir,
-    partition="icelake-himem",
-    account="LEE-SL3-CPU",
-    time="3:0:0",
+    account="matgen",
+    time="11:55:0",
     array=f"1-{slurm_array_task_count}",
-    slurm_flags=("--mem", "12G"),
+    slurm_flags="--qos shared --constraint cpu --mem 16G",
     # TF_CPP_MIN_LOG_LEVEL=2 means INFO and WARNING logs are not printed
     # https://stackoverflow.com/a/40982782
     pre_cmd="TF_CPP_MIN_LOG_LEVEL=2",
@@ -88,7 +87,13 @@
     task_type=task_type,
     df=dict(shape=str(df_in.shape), columns=", ".join(df_in)),
     slurm_vars=slurm_vars,
-    trainable_params=sum(param.numel() for param in m3gnet.parameters()),
+    trainable_params=sum(
+        [np.prod(weight.shape) for weight in m3gnet.potential.model.trainable_weights]
+    ),
+    checkpoint=checkpoint,
+    model_type=model_type,
+    out_path=out_path,
+    job_name=job_name,
 )
 
 run_name = f"{job_name}-{slurm_array_task_id}"
@@ -103,7 +108,7 @@
 
 structures = df_in[input_col].map(Structure.from_dict).to_dict()
 
-for material_id in tqdm(structures, desc="Relaxing", disable=None):
+for material_id in tqdm(structures, desc="Relaxing"):
     if material_id in relax_results:
         continue
     try:
diff --git a/models/mace/join_mace_results.py b/models/mace/join_mace_results.py
@@ -29,7 +29,6 @@
 module_dir = os.path.dirname(__file__)
 task_type = "IS2RE"
 e_form_mace_col = "e_form_per_atom_mace"
-
 date = "2023-12-11"
 glob_pattern = f"{date}-mace-wbm-{task_type}*/*.json.gz"
 file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
@@ -92,16 +91,15 @@
 
 
 # %%
-bad_mask = (df_wbm[e_form_mace_col] - df_wbm[e_form_col]) < -3
-df_wbm[bad_mask].to_csv(f"{module_dir}/mace-underpredictions<-3.csv")
+bad_mask = (df_wbm[e_form_mace_col] - df_wbm[e_form_col]) < -5
 print(f"{sum(bad_mask)=}")
 ax = density_scatter(df=df_wbm[~bad_mask], x=e_form_col, y=e_form_mace_col)
 
 
 # %%
 out_path = file_paths[0].rsplit("/", 1)[0]
 df_mace = df_mace.round(4)
-df_mace[~bad_mask].select_dtypes("number").to_csv(f"{out_path}.csv.gz")
+df_mace.select_dtypes("number").to_csv(f"{out_path}.csv.gz")
 df_mace.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)
 
 df_bad = df_mace[bad_mask].drop(columns=[entry_col, struct_col])
diff --git a/models/mace/test_mace.py b/models/mace/test_mace.py
@@ -3,13 +3,13 @@
 
 import os
 from importlib.metadata import version
-from typing import Any
+from typing import Any, Literal
 
 import numpy as np
 import pandas as pd
 import torch
 import wandb
-from ase.filters import FrechetCellFilter
+from ase.filters import ExpCellFilter, FrechetCellFilter
 from ase.optimize import FIRE, LBFGS
 from mace.calculators import mace_mp
 from mace.tools import count_parameters
@@ -31,7 +31,7 @@
 task_type = "IS2RE"  # "RS2RE"
 module_dir = os.path.dirname(__file__)
 # set large job array size for smaller data splits and faster testing/debugging
-slurm_array_task_count = 20
+slurm_array_task_count = 50
 ase_optimizer = "FIRE"
 job_name = f"mace-wbm-{task_type}-{ase_optimizer}"
 out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
@@ -42,15 +42,16 @@
     "2023-10-29-mace-16M-pbenner-mptrj-no-conditional-loss",
     "https://tinyurl.com/y7uhwpje",
 ][-1]
+ase_filter: Literal["frechet", "exp"] = "frechet"
 
 slurm_vars = slurm_submit(
     job_name=job_name,
     out_dir=out_dir,
     account="matgen",
-    time="9:55:0",
+    time="11:55:0",
     array=f"1-{slurm_array_task_count}",
-    slurm_flags="--qos shared --constraint gpu --gpus 1",
-    # slurm_flags="--qos shared --constraint cpu --mem 16G",
+    # slurm_flags="--qos shared --constraint gpu --gpus 1",
+    slurm_flags="--qos shared --constraint cpu --mem 32G",
 )
 
 
@@ -98,6 +99,7 @@
     trainable_params=count_parameters(mace_calc.models[0]),
     model_name=model_name,
     dtype=dtype,
+    ase_filter=ase_filter,
 )
 
 run_name = f"{job_name}-{slurm_array_task_id}"
@@ -112,6 +114,7 @@
     df_in[input_col] = [x["structure"] for x in df_in.computed_structure_entry]
 
 structs = df_in[input_col].map(Structure.from_dict).to_dict()
+filter_cls = {"frechet": FrechetCellFilter, "exp": ExpCellFilter}[ase_filter]
 
 for material_id in tqdm(structs, desc="Relaxing"):
     if material_id in relax_results:
@@ -121,7 +124,7 @@
         atoms = structs[material_id].to_ase_atoms()
         atoms.calc = mace_calc
         if max_steps > 0:
-            atoms = FrechetCellFilter(atoms)
+            atoms = filter_cls(atoms)
             optim_cls = {"FIRE": FIRE, "LBFGS": LBFGS}[ase_optimizer]
             optimizer = optim_cls(atoms, logfile="/dev/null")
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -98,7 +98,6 @@ ignore = [
   "FIX002",
   "INP001",
   "N806",    # non-lowercase-variable-in-function
-  "NPY002",
   "PD901",   # pandas-df-variable-name
   "PERF203", # try-except-in-loop
   "PLC0414", # useless-import-alias
@@ -119,6 +118,7 @@ ignore = [
 ]
 pydocstyle.convention = "google"
 isort.known-third-party = ["wandb"]
+isort.split-on-trailing-comma = false
 
 [tool.ruff.per-file-ignores]
 "tests/*" = ["D", "S101"]
diff --git a/scripts/model_figs/make_hull_dist_box_plot.py b/scripts/model_figs/make_hull_dist_box_plot.py
@@ -20,7 +20,7 @@
     # different fill colors for each box
     # patch_artist=True,
     # notch=True,
-    # bootstrap=10000,
+    # bootstrap=10_000,
     showmeans=True,
     # meanline=True,
 )
diff --git a/scripts/model_figs/model_run_times.py b/scripts/model_figs/model_run_times.py
@@ -155,7 +155,7 @@
 ).update_traces(
     textinfo="percent+label",
     textfont_size=14,
-    marker=dict(line=dict(color="#000000", width=2)),
+    marker=dict(line=dict(color="black", width=2)),
     hoverinfo="label+percent+name",
     texttemplate="%{label}<br>%{percent:.1%}",
     hovertemplate="%{label} %{percent:.1%} (%{value:.1f} h)",
diff --git a/site/package.json b/site/package.json
@@ -17,38 +17,38 @@
     "changelog": "npx auto-changelog --output ../changelog.md --hide-credit --commit-limit false --latest-version x.y.z"
   },
   "devDependencies": {
-    "@iconify/svelte": "^3.1.4",
+    "@iconify/svelte": "^3.1.6",
     "@rollup/plugin-yaml": "^4.1.2",
-    "@sveltejs/adapter-static": "^2.0.3",
-    "@sveltejs/kit": "^1.27.4",
-    "@sveltejs/vite-plugin-svelte": "^2.5.1",
-    "@typescript-eslint/eslint-plugin": "^6.10.0",
-    "@typescript-eslint/parser": "^6.10.0",
+    "@sveltejs/adapter-static": "^3.0.1",
+    "@sveltejs/kit": "^2.0.6",
+    "@sveltejs/vite-plugin-svelte": "^3.0.1",
+    "@typescript-eslint/eslint-plugin": "^6.16.0",
+    "@typescript-eslint/parser": "^6.16.0",
     "d3-scale-chromatic": "^3.0.0",
     "elementari": "^0.2.2",
-    "eslint": "^8.53.0",
-    "eslint-plugin-svelte": "^2.35.0",
+    "eslint": "^8.56.0",
+    "eslint-plugin-svelte": "^2.35.1",
     "hastscript": "^8.0.0",
     "highlight.js": "^11.9.0",
     "js-yaml": "^4.1.0",
     "katex": "^0.16.9",
     "mdsvex": "^0.11.0",
-    "prettier": "^3.0.3",
-    "prettier-plugin-svelte": "^3.0.3",
+    "prettier": "^3.1.1",
+    "prettier-plugin-svelte": "^3.1.2",
     "rehype-autolink-headings": "^7.1.0",
     "rehype-katex-svelte": "^1.2.0",
     "rehype-slug": "^6.0.0",
     "remark-math": "3.0.0",
-    "svelte": "^4.2.2",
-    "svelte-check": "^3.5.2",
+    "svelte": "^4.2.8",
+    "svelte-check": "^3.6.2",
     "svelte-multiselect": "^10.2.0",
-    "svelte-preprocess": "^5.0.4",
+    "svelte-preprocess": "^5.1.3",
     "svelte-toc": "^0.5.6",
     "svelte-zoo": "^0.4.9",
-    "svelte2tsx": "^0.6.23",
+    "svelte2tsx": "^0.6.27",
     "tslib": "^2.6.2",
-    "typescript": "5.2.2",
-    "vite": "^4.5.0"
+    "typescript": "5.3.3",
+    "vite": "^5.0.10"
   },
   "prettier": {
     "semi": false,
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -55,8 +55,8 @@ def test_stable_metrics() -> None:
 
     # test stable_metrics gives the same result as sklearn.metrics.classification_report
     # for random numpy data
-    np.random.seed(0)
-    y_true, y_pred = np.random.randn(100, 2).T
+    rng = np.random.default_rng(0)
+    y_true, y_pred = rng.normal(size=(2, 100))
     metrics = stable_metrics(y_true, y_pred)
 
     from sklearn.metrics import classification_report
diff --git a/tests/test_structure.py b/tests/test_structure.py
@@ -2,25 +2,19 @@
 
 from typing import TYPE_CHECKING
 
-import numpy as np
-
 from matbench_discovery.structure import perturb_structure
 
 if TYPE_CHECKING:
     from pymatgen.core import Structure
 
 
 def test_perturb_structure(dummy_struct: Structure) -> None:
-    np.random.seed(0)
     perturbed = perturb_structure(dummy_struct)
     assert len(perturbed) == len(dummy_struct)
 
     for site, new in zip(dummy_struct, perturbed):
         assert site.specie == new.specie
         assert tuple(site.coords) != tuple(new.coords)
 
-    # test that the perturbation is reproducible
-    np.random.seed(0)
-    assert perturbed == perturb_structure(dummy_struct)
     # but different on subsequent calls
     assert perturb_structure(dummy_struct) != perturb_structure(dummy_struct)

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`# different fill colors for each box`
`21`	`21`	`# patch_artist=True,`
`22`	`22`	`# notch=True,`
`23`		`- # bootstrap=10000,`
	`23`	`+ # bootstrap=10_000,`
`24`	`24`	`showmeans=True,`
`25`	`25`	`# meanline=True,`
`26`	`26`	`)`