fix email and MP DB version used for convex hull construction in list of WBM processing steps

janosh · janosh · commit 1cc9fe614f43 · 2024-02-02T19:14:27.000+01:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.15
+    rev: v0.2.0
     hooks:
       - id: ruff
         args: [--fix]
diff --git a/citation.cff b/citation.cff
@@ -6,7 +6,7 @@ authors:
   - given-names: Janosh
     family-names: Riebesell
     affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
-    email: janosh.riebesell@gmail.gov
+    email: janosh.riebesell@gmail.com
     orcid: https://orcid.org/0000-0001-5233-3462
     corresponding: true
     affil_key: 1, 2
diff --git a/data/mp/eda_mp_trj.py b/data/mp/eda_mp_trj.py
@@ -477,8 +477,25 @@ def tile_count_anno(hist_vals: list[Any]) -> dict[str, Any]:
 )
 fig.layout.xaxis3 = dict(overlaying="x2", visible=False)
 
+# project line from 90% cumulative to x axis
+x_90 = df_n_sites[Key.n_sites][
+    (df_n_sites[n_struct_col].cumsum() / df_n_sites[n_struct_col].sum()) < 0.9
+].iloc[-1]
+for x0, y0, x1, y1 in (
+    (x_90, 0, x_90, 0.9),
+    (x_90, 0.9, df_n_sites[Key.n_sites].max(), 0.9),
+):
+    fig.add_shape(
+        type="line",
+        **dict(x0=x0, y0=y0, x1=x1, y1=y1),
+        line=dict(width=1, dash="dot"),
+        xref="x3",
+        yref="y3",
+    )
+fig.layout.yaxis3.update(showgrid=False, rangemode="tozero")
+
 fig.layout.margin = dict(l=5, r=5, b=5, t=5)
-fig.layout.legend.update(x=0.96, y=0.25, xanchor="right")
+fig.layout.legend.update(x=0.96, y=0.18, xanchor="right", bgcolor="rgba(0,0,0,0)")
 fig.show()
 
 img_name = "mp-trj-n-sites-hist"
diff --git a/data/wbm/compile_wbm_test_set.py b/data/wbm/compile_wbm_test_set.py
@@ -314,8 +314,8 @@ def increment_wbm_material_id(wbm_id: str) -> str:
     df_summary_bz2.reset_index(drop=True).query(query_str),
 )
 
-
-assert sum(no_id_mask := df_summary.index.isna()) == 6, f"{sum(no_id_mask)=}"
+no_id_mask = df_summary.index.isna()
+assert sum(no_id_mask) == 6, f"{sum(no_id_mask)=}"
 # the 'None' materials have 0 volume, energy, n_sites, bandgap, etc.
 assert all(df_summary[no_id_mask].drop(columns=[Key.formula]) == 0)
 assert len(df_summary.query("volume > 0")) == len(df_wbm) + len(nan_init_structs_ids)
diff --git a/data/wbm/readme.md b/data/wbm/readme.md
@@ -29,7 +29,7 @@ The full set of processing steps used to curate the WBM test set from the raw da
   </slot>
 
 - apply the [`MaterialsProject2020Compatibility`](https://github.com/materialsproject/pymatgen/blob/02a4ca8aa0277b5f6db11f4de4fdbba129de70a5/pymatgen/entries/compatibility.py#L823) energy correction scheme to the formation energies
-- compute energy to the Materials Project convex hull constructed from all MP `ComputedStructureEntries` queried on 2023-02-07 ([database release 2021.05.13](https://docs.materialsproject.org/changes/database-versions#v2021.05.13))
+- compute energy to the Materials Project convex hull constructed from all MP `ComputedStructureEntries` queried on 2023-02-07 ([database release 2022.10.28](https://docs.materialsproject.org/changes/database-versions#v2022.10.28))
 
 Invoking the script `python compile_wbm_test_set.py` will auto-download and regenerate the WBM test set files from scratch. If you find
 
diff --git a/models/chgnet/metadata.yml b/models/chgnet/metadata.yml
@@ -15,7 +15,7 @@ authors:
     orcid: https://orcid.org/0000-0003-1974-028X
   - name: Janosh Riebesell
     affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
-    email: janosh.riebesell@gmail.gov
+    email: janosh.riebesell@gmail.com
     orcid: https://orcid.org/0000-0001-5233-3462
   - name: Kevin Han
     affiliation: UC Berkeley
diff --git a/models/voronoi/train_test_voronoi_rf.py b/models/voronoi/train_test_voronoi_rf.py
@@ -19,7 +19,8 @@
 from matbench_discovery.slurm import slurm_submit
 
 sys.path.append(f"{ROOT}/models")
-from voronoi import featurizer  # noqa: E402
+
+from voronoi import featurizer
 
 __author__ = "Janosh Riebesell"
 __date__ = "2022-11-26"
diff --git a/models/voronoi/voronoi_featurize_dataset.py b/models/voronoi/voronoi_featurize_dataset.py
@@ -19,7 +19,8 @@
 from matbench_discovery.slurm import slurm_submit
 
 sys.path.append(f"{ROOT}/models")
-from voronoi import featurizer  # noqa: E402
+
+from voronoi import featurizer
 
 __author__ = "Janosh Riebesell"
 __date__ = "2022-10-31"
diff --git a/models/wrenformer/metadata.yml b/models/wrenformer/metadata.yml
@@ -6,7 +6,7 @@ date_published: "2021-06-21"
 authors:
   - name: Janosh Riebesell
     affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
-    email: janosh.riebesell@gmail.gov
+    email: janosh.riebesell@gmail.com
     orcid: https://orcid.org/0000-0001-5233-3462
   - name: Rhys Goodall
     affiliation: University of Cambridge
diff --git a/pyproject.toml b/pyproject.toml
@@ -77,8 +77,8 @@ universal = true
 
 [tool.ruff]
 target-version = "py39"
-select = ["ALL"]
-ignore = [
+lint.select = ["ALL"]
+lint.ignore = [
   "ANN101",
   "ANN401",
   "BLE001",
@@ -113,11 +113,11 @@ ignore = [
   "TRY003",
   "TRY301",
 ]
-pydocstyle.convention = "google"
-isort.known-third-party = ["wandb"]
-isort.split-on-trailing-comma = false
+lint.pydocstyle.convention = "google"
+lint.isort.known-third-party = ["wandb"]
+lint.isort.split-on-trailing-comma = false
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "tests/*" = ["D", "S101"]
 "matbench_discovery/plots.py" = ["ERA001"] # allow commented out code
 "matbench_discovery/preds.py" = ["ERA001"] # allow commented out code
diff --git a/readme.md b/readme.md
@@ -27,6 +27,6 @@ Our results show that ML models have become robust enough to deploy them as tria
 
 We welcome contributions that add new models to the leaderboard through GitHub PRs. See the [contributing guide](https://janosh.github.io/matbench-discovery/contribute) for details.
 
-If you're interested in joining this work, feel free to [open a GitHub discussion](https://github.com/janosh/matbench-discovery/discussions) or [send an email](mailto:janosh.riebesell@gmail.gov?subject=Collaborate%20on%20Matbench%20Discovery).
+If you're interested in joining this work, feel free to [open a GitHub discussion](https://github.com/janosh/matbench-discovery/discussions) or [send an email](mailto:janosh.riebesell@gmail.com?subject=Collaborate%20on%20Matbench%20Discovery).
 
 For detailed results and analysis, check out the [preprint](https://janosh.github.io/matbench-discovery/preprint).
diff --git a/site/package.json b/site/package.json
@@ -1,7 +1,7 @@
 {
   "name": "matbench-discovery",
   "description": "Benchmarking machine learning energy models for materials discovery.",
-  "author": "Janosh Riebesell <janosh.riebesell@gmail.gov>",
+  "author": "Janosh Riebesell <janosh.riebesell@gmail.com>",
   "homepage": "https://janosh.github.io/matbench-discovery",
   "repository": "https://github.com/janosh/matbench-discovery",
   "package": "https://pypi.org/project/matbench-discovery",
diff --git a/site/src/figs/mp-trj-n-sites-hist.svelte b/site/src/figs/mp-trj-n-sites-hist.svelte
diff --git a/site/src/lib/Footer.svelte b/site/src/lib/Footer.svelte
@@ -25,7 +25,7 @@
 <footer>
   <nav>
     <a href="{repository}/issues">Issues</a>
-    <a href="mailto:janosh.riebesell@gmail.gov?subject=Matbench Discovery">Contact</a>
+    <a href="mailto:janosh.riebesell@gmail.com?subject=Matbench Discovery">Contact</a>
     <a href="/changelog">Changelog</a>
     <button
       on:click={() => (show_tips = true)}
diff --git a/site/src/routes/preprint/iclr-ml4mat/+page.md b/site/src/routes/preprint/iclr-ml4mat/+page.md
@@ -66,13 +66,15 @@ In contrast, we believe the discovery of stable materials is a problem where ML
 
 ## Data Sets
 
-The choice of data for the train and test sets of this benchmark fell on the latest Materials Project (MP) @jain_commentary_2013 database release (2021.05.13 at time of writing) and the WBM dataset @wang_predicting_2021.
+The choice of data for the train and test sets of this benchmark fell on the latest Materials Project (MP) @jain_commentary_2013 database release ([2022.10.28] at time of writing) and the WBM dataset @wang_predicting_2021.
+
+[2022.10.28]: https://docs.materialsproject.org/changes/database-versions#v2022.10.28
 
 ### The Materials Project - Training Set
 
 The Materials Project is a well-known effort to calculate the properties of all inorganic materials using high-throughput ab-initio methods.
 At the time of access, the Materials Project database contains approximately 154k crystals (providing relaxed+initial structure and the relaxation trajectory for each of them) covering a diverse range of chemistries.
-For our benchmark, the training set is all data available from the 2021.05.13 MP release. Models are free to train on relaxed and/or unrelaxed structures or the full DFT relaxation trajectory. This flexibility is intended to allow authors to experiment and exploit the large variety of data available.
+For our benchmark, the training set is all data available from the [2022.10.28] MP release. Models are free to train on relaxed and/or unrelaxed structures or the full DFT relaxation trajectory. This flexibility is intended to allow authors to experiment and exploit the large variety of data available.
 
 ### WBM - Test Set
 
diff --git a/site/tsconfig.json b/site/tsconfig.json
@@ -12,6 +12,6 @@
 
     "forceConsistentCasingInFileNames": true,
     "resolveJsonModule": true,
-    "allowSyntheticDefaultImports": true,
-  },
+    "allowSyntheticDefaultImports": true
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "matbench-discovery",`
`3`	`3`	`"description": "Benchmarking machine learning energy models for materials discovery.",`
`4`		`- "author": "Janosh Riebesell <janosh.riebesell@gmail.gov>",`
	`4`	`+ "author": "Janosh Riebesell <janosh.riebesell@gmail.com>",`
`5`	`5`	`"homepage": "https://janosh.github.io/matbench-discovery",`
`6`	`6`	`"repository": "https://github.com/janosh/matbench-discovery",`
`7`	`7`	`"package": "https://pypi.org/project/matbench-discovery",`
Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,6 @@`
`12`	`12`
`13`	`13`	`"forceConsistentCasingInFileNames": true,`
`14`	`14`	`"resolveJsonModule": true,`
`15`		`- "allowSyntheticDefaultImports": true,`
`16`		`- },`
	`15`	`+ "allowSyntheticDefaultImports": true`
	`16`	`+ }`
`17`	`17`	`}`