make wide tables horizontally scrollable on mobile screens

janosh · janosh · commit fc20ccf5f33d · 2023-06-19T20:30:44.000-07:00
add flex-wrap: wrap; to side-by-side figures to get line breaks on mobile

add explanation for CHGNet/M3GNet difference in caption of fig:cumulative-mae-rmse
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -70,6 +70,7 @@ def unit(text: str) -> str:
     cgcnn="CGCNN",
     m3gnet_megnet="M3GNet + MEGNet",
     m3gnet="M3GNet",
+    m3gnet_directs="M3GNet DIRECTS",
     megnet="MEGNet",
     voronoi_rf="Voronoi RF",
     wrenformer="Wrenformer",
@@ -841,7 +842,7 @@ def df_to_svelte_table(
     styler: Styler,
     file_path: str | Path,
     inline_props: str = "",
-    styles: str = "",
+    styles: str = "table { overflow: scroll; max-width: 100%; display: block; }",
     **kwargs: Any,
 ) -> None:
     """Convert a pandas Styler to a svelte table.
diff --git a/matbench_discovery/preds.py b/matbench_discovery/preds.py
@@ -44,6 +44,7 @@ class PredFiles(Files):
 
     # original M3GNet straight from publication, not re-trained
     m3gnet = "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv"
+    # m3gnet_directs = "m3gnet/2023-05-30-m3gnet-directs-wbm-IS2RE.csv"
 
     # original MEGNet straight from publication, not re-trained
     megnet = "megnet/2022-11-18-megnet-wbm-IS2RE/megnet-e-form-preds.csv"
diff --git a/models/bowsr/join_bowsr_results.py b/models/bowsr/join_bowsr_results.py
@@ -66,11 +66,14 @@
 
 
 # %%
-out_path = f"{module_dir}/{today}-bowsr-megnet-wbm-{task_type}.json.gz"
-df_bowsr.reset_index().to_json(out_path, default_handler=lambda x: x.as_dict())
+out_path = f"{module_dir}/{today}-bowsr-megnet-wbm-{task_type}"
+df_bowsr = df_bowsr.round(4)
+# save energy and formation energy as fast-loading CSV
+df_bowsr.select_dtypes("number").to_csv(f"{out_path}.csv")
+df_bowsr.reset_index().to_json(
+    f"{out_path}.json.gz", default_handler=lambda x: x.as_dict()
+)
 
-# save energy and formation energy as CSV for fast loading
-df_bowsr.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv"))
 
 # in_path = f"{module_dir}/2023-01-23-bowsr-megnet-wbm-IS2RE.json.gz"
 # df_bowsr = pd.read_json(in_path).set_index("material_id")
diff --git a/models/chgnet/join_chgnet_results.py b/models/chgnet/join_chgnet_results.py
@@ -64,10 +64,10 @@
 
 
 # %%
-out_path = f"{module_dir}/{today}-chgnet-wbm-{task_type}.json.gz"
+out_path = f"{module_dir}/{today}-chgnet-wbm-{task_type}"
 df_chgnet = df_chgnet.round(4)
-df_chgnet.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv"))
-df_chgnet.reset_index().to_json(out_path, default_handler=as_dict_handler)
+df_chgnet.select_dtypes("number").to_csv(f"{out_path}.csv")
+df_chgnet.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)
 
 # in_path = f"{module_dir}/2023-03-04-chgnet-wbm-IS2RE.json.gz"
 # df_chgnet = pd.read_csv(in_path.replace(".json.gz", ".csv")).set_index("material_id")
diff --git a/models/m3gnet/join_m3gnet_results.py b/models/m3gnet/join_m3gnet_results.py
@@ -30,13 +30,16 @@
 # %%
 module_dir = os.path.dirname(__file__)
 task_type = "IS2RE"
-date = "2022-10-31"
-glob_pattern = f"{date}-m3gnet-wbm-{task_type}/*.json.gz"
+date = "2023-05-30"
+model_type = "directs"
+glob_pattern = f"{date}-m3gnet-{model_type}-wbm-{task_type}/*.json.gz"
 file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
 struct_col = "m3gnet_structure"
 print(f"Found {len(file_paths):,} files for {glob_pattern = }")
 
-dfs: dict[str, pd.DataFrame] = {}
+# prevent accidental overwrites
+if "dfs" not in locals():
+    dfs: dict[str, pd.DataFrame] = {}
 
 
 # %%
@@ -66,7 +69,7 @@
 for row in tqdm(df_m3gnet.itertuples(), total=len(df_m3gnet)):
     mat_id, struct_dict, m3gnet_energy, *_ = row
     m3gnet_struct = Structure.from_dict(struct_dict)
-    df_m3gnet.loc[mat_id, struct_col] = m3gnet_struct
+    df_m3gnet.at[mat_id, struct_col] = m3gnet_struct  # noqa: PD008
     cse = df_cse.loc[mat_id, "cse"]
     cse._energy = m3gnet_energy  # cse._energy is the uncorrected energy
     cse._structure = m3gnet_struct
@@ -81,7 +84,7 @@
 
 
 # %% compute corrected formation energies
-df_m3gnet["e_form_per_atom_m3gnet"] = [
+df_m3gnet[f"e_form_per_atom_m3gnet_{model_type}"] = [
     get_e_form_per_atom(cse) for cse in tqdm(df_m3gnet.cse)
 ]
 
@@ -93,11 +96,11 @@
 
 
 # %%
-out_path = f"{module_dir}/{today}-m3gnet-wbm-{task_type}.json.gz"
+out_path = f"{module_dir}/{today}-m3gnet-{model_type}-wbm-{task_type}"
 df_m3gnet = df_m3gnet.round(4)
-df_m3gnet.reset_index().to_json(out_path, default_handler=as_dict_handler)
+df_m3gnet.select_dtypes("number").to_csv(f"{out_path}.csv")
+df_m3gnet.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)
 
-df_m3gnet.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv"))
 
 # in_path = f"{module_dir}/2022-10-31-m3gnet-wbm-IS2RE.json.gz"
 # df_m3gnet = pd.read_csv(in_path.replace(".json.gz", ".csv")).set_index("material_id")
diff --git a/models/m3gnet/test_m3gnet.py b/models/m3gnet/test_m3gnet.py
@@ -11,7 +11,7 @@
 import os
 import warnings
 from importlib.metadata import version
-from typing import Any
+from typing import Any, Literal
 
 import numpy as np
 import pandas as pd
@@ -20,7 +20,7 @@
 from pymatgen.core import Structure
 from tqdm import tqdm
 
-from matbench_discovery import DEBUG, timestamp, today
+from matbench_discovery import DEBUG, ROOT, timestamp, today
 from matbench_discovery.data import DATA_FILES, as_dict_handler
 from matbench_discovery.slurm import slurm_submit
 
@@ -29,9 +29,10 @@
 
 task_type = "IS2RE"  # "RS2RE"
 module_dir = os.path.dirname(__file__)
+model_type: Literal["orig", "direct", "manual-sampling"] = "manual-sampling"
 # set large job array size for smaller data splits and faster testing/debugging
 slurm_array_task_count = 100
-job_name = f"m3gnet-wbm-{task_type}{'-debug' if DEBUG else ''}"
+job_name = f"m3gnet-{model_type}-wbm-{task_type}{'-debug' if DEBUG else ''}"
 out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
 
 slurm_vars = slurm_submit(
@@ -85,7 +86,12 @@
 
 
 # %%
-megnet = Relaxer()  # load default pre-trained M3GNet model
+checkpoint = None
+if model_type == "direct":
+    checkpoint = f"{ROOT}/models/m3gnet/2023-05-26-DI-DFTstrictF10-TTRS-128U-442E"
+if model_type == "manual-sampling":
+    checkpoint = f"{ROOT}/models/m3gnet/2023-05-26-MS-DFTstrictF10-128U-154E"
+megnet = Relaxer(potential=checkpoint)  # load pre-trained M3GNet model
 relax_results: dict[str, dict[str, Any]] = {}
 input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type]
 
diff --git a/scripts/cumulative_metrics.py b/scripts/cumulative_metrics.py
@@ -27,14 +27,14 @@
 
 
 # %%
-# metrics = ("Precision", "Recall")
-metrics = ("MAE", "RMSE")
+metrics = ("Precision", "Recall")
+# metrics = ("MAE", "RMSE")
 fig, df_metric = cumulative_metrics(
     e_above_hull_true=df_preds[each_true_col],
     df_preds=df_each_pred[models],
     project_end_point="xy",
     backend=(backend := "plotly"),
-    range_y=(0, 0.4),
+    range_y=(0, 1),
     metrics=metrics,
     # facet_col_wrap=2,
     # increase facet col gap
diff --git a/site/src/app.css b/site/src/app.css
@@ -122,8 +122,10 @@ img {
 }
 
 table {
+  display: block;
+  max-width: 100%;
+  overflow: scroll;
   border-collapse: collapse;
-  width: 100%;
 }
 table :is(td, th) {
   border: 1px solid gray;
diff --git a/site/src/routes/+layout.svelte b/site/src/routes/+layout.svelte
@@ -46,16 +46,16 @@
       document.documentElement.style.setProperty(`--main-max-width`, `50em`)
     }
 
-    for (const node of document.querySelectorAll('pre > code')) {
+    for (const node of document.querySelectorAll(`pre > code`)) {
       // skip if <pre> already contains a button (presumably for copy)
       const pre = node.parentElement
       if (!pre || pre.querySelector(`button`)) continue
 
       new CopyButton({
         target: pre,
         props: {
-          content: node.textContent ?? '',
-          style: 'position: absolute; top: 1ex; right: 1ex;',
+          content: node.textContent ?? ``,
+          style: `position: absolute; top: 1ex; right: 1ex;`,
         },
       })
     }
diff --git a/site/src/routes/about-the-data/+page.svelte b/site/src/routes/about-the-data/+page.svelte
@@ -107,7 +107,7 @@
     {/if}
   </svelte:fragment>
   <div
-    style="display: flex; gap: 1em; justify-content: space-around;"
+    style="display: flex; gap: 1em; justify-content: space-around; flex-wrap: wrap;"
     slot="spacegroup-sunbursts"
   >
     {#if browser}
diff --git a/site/src/routes/preprint/+page.md b/site/src/routes/preprint/+page.md
@@ -241,11 +241,13 @@ We welcome further model submissions at
 
 ## Acknowledgments
 
-Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain.
+Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)).
 
-We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set @wang_predicting_2021.
+A big thank you to
 
-Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release.
+- Hai-Chen Wang and co-authors for creating and freely providing the WBM data set
+- Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions
+- Philipp Benner ([@pbenner](https://github.com/pbenner)) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading routines before the v1 release.
 
 ## Author Contributions
 
diff --git a/site/src/routes/preprint/iclr-ml4mat/+page.md b/site/src/routes/preprint/iclr-ml4mat/+page.md
@@ -140,11 +140,13 @@ We welcome further model submissions as well as data contributions for version 2
 
 ## Acknowledgments
 
-Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain.
+Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)).
 
-We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set.
+A big thank you to
 
-Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release.
+- Hai-Chen Wang and co-authors for creating and freely providing the WBM data set
+- Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions
+- Philipp Benner ([@pbenner](https://github.com/pbenner)) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading routines before the v1 release.
 
 ## Author Contributions
 
diff --git a/site/src/routes/si/+page.md b/site/src/routes/si/+page.md
@@ -50,7 +50,7 @@
 {/if}
 
 > @label:fig:cumulative-mae-rmse Cumulative mean absolute error (MAE) and root mean square error (RMSE) during a simulated discovery campaign. This figure expands on the [precision-recall figure](/preprint#fig:cumulative-precision-recall). The $x$-axis again shows number of materials sorted by model-predicted stability or 'campaign length'. This allows the reader to choose a cutoff point given their discovery campaign's resource constraints for validating model predictions and then read off the optimal model given those constraints.
-> CHGNet achieves the lowest regression error profile, with a larger gap to the runner-up model M3GNet than in the precision-recall plots.
+> CHGNet achieves the lowest regression error profile, with a larger gap to the runner-up model M3GNet than in the precision-recall plots. This is likely due to the difference in TPR/TNR trade off between CHGNet and M3GNet. M3GNet has TNR = 0.80 vs CHGNet's TNR = 0.87. Higher TNR means lower FPR. Lower false positive rate means lower cumulative MAE and RMSE. Lines end when models stop predicting materials as stable, so these cumulative plots only contain model-predicted positive (stable) materials. Besides the high opportunity cost of false positives, this highlights another reason to prioritize low FPR in discovery models: lower error on the predictions of highest relevance.
 
 ## Model Run Times
 
@@ -134,11 +134,11 @@ We highlight this here to refute the suggestion that training on raw DFT energie
 
 {#if mounted}
 
-<div style="display: flex; gap: 1em; justify-content: space-around;">
+<div style="display: flex; gap: 1em; justify-content: space-around; flex-wrap: wrap;">
 <SpacegroupSunburstWrenformerFailures />
 <SpacegroupSunburstWbm />
 </div>
-<EAboveHullScatterWrenformerFailures style="height: 300; width: 300;" />
+<EAboveHullScatterWrenformerFailures />
 {/if}
 
 > @label:fig:spacegroup-prevalence-wrenformer-failures The left spacegroup sunburst shows spacegroup 71 is by far the dominant lattice symmetry among the 941 Wrenformer failure cases where $E_\text{above hull,DFT} < 1$ and $E_\text{above hull,Wrenformer} > 1$ (points inside the shaded rectangle). On the right side for comparison is the spacegroup sunburst for the entire WBM test set.
diff --git a/site/src/routes/si/largest-error-scatter-select.svelte b/site/src/routes/si/largest-error-scatter-select.svelte
@@ -5,7 +5,7 @@
 
   const figs = import.meta.glob(
     `$figs/scatter-largest-errors-models-mean-vs-true-hull-dist-*.svelte`,
-    { eager: true, import: 'default' }
+    { eager: true, import: `default` }
   )
 
   let selected: string[] = [Object.keys(figs)[0]]

Original file line number	Diff line number	Diff line change
`@@ -122,8 +122,10 @@ img {`
`122`	`122`	`}`
`123`	`123`
`124`	`124`	`table {`
	`125`	`+ display: block;`
	`126`	`+ max-width: 100%;`
	`127`	`+ overflow: scroll;`
`125`	`128`	`border-collapse: collapse;`
`126`		`- width: 100%;`
`127`	`129`	`}`
`128`	`130`	`table :is(td, th) {`
`129`	`131`	`border: 1px solid gray;`
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@`
`107`	`107`	`{/if}`
`108`	`108`	`</svelte:fragment>`
`109`	`109`	`<div`
`110`		`- style="display: flex; gap: 1em; justify-content: space-around;"`
	`110`	`+ style="display: flex; gap: 1em; justify-content: space-around; flex-wrap: wrap;"`
`111`	`111`	`slot="spacegroup-sunbursts"`
`112`	`112`	`>`
`113`	`113`	`{#if browser}`
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`
`6`	`6`	`const figs = import.meta.glob(`
`7`	`7`	`$figs/scatter-largest-errors-models-mean-vs-true-hull-dist-*.svelte`,
`8`		`- { eager: true, import: 'default' }`
	`8`	+ { eager: true, import: `default` }
`9`	`9`	`)`
`10`	`10`
`11`	`11`	`let selected: string[] = [Object.keys(figs)[0]]`