janosh
diff --git a/‎matbench_discovery/data.py
+3-3 b/‎matbench_discovery/data.py
+3-3
diff --git a/‎matbench_discovery/metrics.py
+4-2 b/‎matbench_discovery/metrics.py
+4-2
diff --git a/‎models/bowsr/metadata.yml
+1-1 b/‎models/bowsr/metadata.yml
+1-1
diff --git a/‎models/cgcnn/metadata.yml
+52-24 b/‎models/cgcnn/metadata.yml
+52-24
diff --git a/‎models/cgcnn/train_cgcnn.py
+4-4 b/‎models/cgcnn/train_cgcnn.py
+4-4
diff --git a/‎models/m3gnet/metadata.yml
+56-24 b/‎models/m3gnet/metadata.yml
+56-24
diff --git a/‎scripts/compile_metrics.py
+15-17 b/‎scripts/compile_metrics.py
+15-17
diff --git a/‎scripts/rolling_mae_vs_hull_dist.py
+1-1 b/‎scripts/rolling_mae_vs_hull_dist.py
+1-1
diff --git a/‎site/src/app.css
-1 b/‎site/src/app.css
-1
@@ -144,8 +144,8 @@ def load_train_test(
     "Wrenformer": "wrenformer/2022-11-15-wrenformer-IS2RE-preds.csv",
     "MEGNet": "megnet/2022-11-18-megnet-wbm-IS2RE/megnet-e-form-preds.csv",
     "M3GNet": "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv",
-    "M3GNet MEGNet": "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv",
-    "BOWSR MEGNet": "bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.csv",
+    "M3GNet + MEGNet": "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv",
+    "BOWSR + MEGNet": "bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.csv",
 }
 
 
@@ -222,7 +222,7 @@ def load_df_wbm_preds(
 
     df_out = df_wbm.copy()
     for model_name, df in dfs.items():
-        model_key = model_name.lower().replace(" ", "_")
+        model_key = model_name.lower().replace(" + ", "_").replace(" ", "_")
         if f"e_form_per_atom_{model_key}" in df:
             df_out[model_name] = df[f"e_form_per_atom_{model_key}"]
 
 
@@ -1,5 +1,7 @@
 """Centralize data-loading and computing metrics for plotting scripts"""
 
+from __future__ import annotations
+
 from collections.abc import Sequence
 
 import numpy as np
@@ -99,8 +101,8 @@ def stable_metrics(
 
 
 models = sorted(
-    "Wrenformer, CGCNN, Voronoi Random Forest, MEGNet, M3GNet "
-    "MEGNet, BOWSR MEGNet".split(", ")
+    "Wrenformer, CGCNN, Voronoi Random Forest, MEGNet, M3GNet + MEGNet, "
+    "BOWSR + MEGNet".split(", ")
 )
 e_form_col = "e_form_per_atom_mp2020_corrected"
 each_true_col = "e_above_hull_mp2020_corrected_ppd_mp"
 
@@ -1,4 +1,4 @@
-model_name: BOWSR MEGNet
+model_name: BOWSR + MEGNet
 model_version: 2022.9.20
 matbench_discovery_version: 1.0
 date_added: "2022-11-17"
 
@@ -1,25 +1,53 @@
-model_name: CGCNN
-model_version: 0.1.0 # the aviary version
-matbench_discovery_version: 1.0
-date_added: "2022-12-28"
-authors:
-  - name: Tian Xie
-    email: [email protected]
-    affiliation: Massachusetts Institute of Technology
-    url: https://txie.me
-  - name: Jeffrey C. Grossman
-    affiliation: Massachusetts Institute of Technology
-    url: https://dmse.mit.edu/people/jeffrey-c-grossman
-repo: https://github.com/txie-93/cgcnn
-doi: https://doi.org/10.1103/PhysRevLett.120.145301
-preprint: https://arxiv.org/abs/1710.10324
-requirements:
-  aviary: 0.1.0
-  torch: 1.11.0
-  torch-scatter: 2.0.9
-  numpy: 1.24.0
-  pandas: 1.5.1
-trained_on_benchmark: true
+- model_name: CGCNN
+  model_version: 0.1.0 # the aviary version
+  matbench_discovery_version: 1.0
+  date_added: "2022-12-28"
+  authors:
+    - name: Tian Xie
+      email: [email protected]
+      affiliation: Massachusetts Institute of Technology
+      url: https://txie.me
+    - name: Jeffrey C. Grossman
+      affiliation: Massachusetts Institute of Technology
+      url: https://dmse.mit.edu/people/jeffrey-c-grossman
+  repo: https://github.com/txie-93/cgcnn
+  doi: https://doi.org/10.1103/PhysRevLett.120.145301
+  preprint: https://arxiv.org/abs/1710.10324
+  requirements:
+    aviary: 0.1.0
+    torch: 1.11.0
+    torch-scatter: 2.0.9
+    numpy: 1.24.0
+    pandas: 1.5.1
+  trained_on_benchmark: true
 
-hyperparams:
-  Ensemble Size: 10
+  hyperparams:
+    Ensemble Size: 10
+
+- model_name: CGCNN+P
+  model_version: 0.1.0 # the aviary version
+  matbench_discovery_version: 1.0
+  date_added: "2023-02-03"
+  authors:
+    - name: Jason B. Gibson
+      affiliation: University of Florida
+    - name: Ajinkya C. Hire
+      affiliation: University of Florida
+    - name: Richard G. Hennig
+      affiliation: University of Florida
+      url: https://hennig.mse.ufl.edu
+      email: [email protected]
+  repo: https://github.com/JasonGibsonUfl/Augmented_CGCNN
+  doi: https://doi.org/10.1038/s41524-022-00891-8
+  preprint: https://arxiv.org/abs/2202.13947
+  requirements:
+    aviary: 0.1.0
+    torch: 1.11.0
+    torch-scatter: 2.0.9
+    numpy: 1.24.0
+    pandas: 1.5.1
+  trained_on_benchmark: true
+
+  hyperparams:
+    Ensemble Size: 10
+    Perturbations: 5
@@ -28,9 +28,9 @@
 target_col = "formation_energy_per_atom"
 input_col = "structure"
 id_col = "material_id"
-augment = 0  # 0 for no augmentation, n>1 means train on n perturbations of each crystal
+perturb = 0  # 0 for no perturbation, n>1 means train on n perturbations of each crystal
 # in the training set all assigned the same original target energy
-job_name = f"train-cgcnn-robust-{augment=}{'-debug' if DEBUG else ''}"
+job_name = f"train-cgcnn-robust-{perturb=}{'-debug' if DEBUG else ''}"
 print(f"{job_name=}")
 robust = "robust" in job_name.lower()
 ensemble_size = 10
@@ -67,7 +67,7 @@
 
 df_aug = df.copy()
 structs = df_aug.pop(input_col)
-for idx in trange(augment, desc="Augmenting"):
+for idx in trange(perturb, desc="Generating perturbed structures"):
     df_aug[input_col] = [perturb_structure(x) for x in structs]
     df = pd.concat([df, df_aug.set_index(f"{x}-aug={idx+1}" for x in df_aug.index)])
 
@@ -108,7 +108,7 @@
     train_df=dict(shape=str(train_data.df.shape), columns=", ".join(train_df)),
     test_df=dict(shape=str(test_data.df.shape), columns=", ".join(test_df)),
     slurm_vars=slurm_vars,
-    augment=augment,
+    perturb=perturb,
     input_col=input_col,
 )
 
 
@@ -1,24 +1,56 @@
-model_name: M3GNet
-model_version: 2022.9.20
-matbench_discovery_version: 1.0
-date_added: "2022-09-20"
-authors:
-  - name: Chi Chen
-    affiliation: UC San Diego
-    role: Model
-  - name: Shyue Ping Ong
-    affiliation: UC San Diego
-    orcid: https://orcid.org/0000-0001-5726-2587
-    email: [email protected]
-repo: https://github.com/materialsvirtuallab/m3gnet
-url: https://materialsvirtuallab.github.io/m3gnet
-doi: https://doi.org/10.1038/s43588-022-00349-3
-preprint: https://arxiv.org/abs/2202.02450
-requirements:
-  m3gnet: 0.1.0
-  pymatgen: 2022.10.22
-  numpy: 1.24.0
-  pandas: 1.5.1
-trained_on_benchmark: false
-notes:
-  training: Using pre-trained model released with paper. Was only trained on a subset of 62,783 MP relaxation trajectories in the 2018 database release (see [related issue](https://github.com/materialsvirtuallab/m3gnet/issues/20#issuecomment-1207087219)).
+- model_name: M3GNet
+  model_version: 2022.9.20
+  matbench_discovery_version: 1.0
+  date_added: "2022-09-20"
+  authors:
+    - name: Chi Chen
+      affiliation: UC San Diego
+      role: Model
+    - name: Shyue Ping Ong
+      affiliation: UC San Diego
+      orcid: https://orcid.org/0000-0001-5726-2587
+      email: [email protected]
+  repo: https://github.com/materialsvirtuallab/m3gnet
+  url: https://materialsvirtuallab.github.io/m3gnet
+  doi: https://doi.org/10.1038/s43588-022-00349-3
+  preprint: https://arxiv.org/abs/2202.02450
+  requirements:
+    m3gnet: 0.1.0
+    pymatgen: 2022.10.22
+    numpy: 1.24.0
+    pandas: 1.5.1
+  trained_on_benchmark: false
+  notes:
+    training: Using pre-trained model released with paper. Was only trained on a subset of 62,783 MP relaxation trajectories in the 2018 database release (see [related issue](https://github.com/materialsvirtuallab/m3gnet/issues/20#issuecomment-1207087219)).
+
+- model_name: M3GNet + MEGNet
+  model_version: 2022.9.20
+  matbench_discovery_version: 1.0
+  date_added: "2023-02-03"
+  authors:
+    - name: Chi Chen
+      affiliation: UC San Diego
+      role: Model
+    - name: Weike Ye
+      affiliation: UC San Diego
+    - name: Yunxing Zuo
+      affiliation: UC San Diego
+    - name: Chen Zheng
+      affiliation: UC San Diego
+    - name: Shyue Ping Ong
+      affiliation: UC San Diego
+      orcid: https://orcid.org/0000-0001-5726-2587
+      email: [email protected]
+  repo: https://github.com/materialsvirtuallab/m3gnet
+  url: https://materialsvirtuallab.github.io/m3gnet
+  doi: https://doi.org/10.1038/s43588-022-00349-3
+  preprint: https://arxiv.org/abs/2202.02450
+  requirements:
+    m3gnet: 0.1.0
+    megnet: 1.3.2
+    pymatgen: 2022.10.22
+    numpy: 1.24.0
+    pandas: 1.5.1
+  trained_on_benchmark: false
+  notes:
+    training: Using pre-trained model released with paper. Was only trained on a subset of 62,783 MP relaxation trajectories in the 2018 database release (see [related issue](https://github.com/materialsvirtuallab/m3gnet/issues/20#issuecomment-1207087219)).
@@ -21,6 +21,7 @@
 
 
 # %%
+model_stats: dict[str, dict[str, str | int | float]] = {}
 models: dict[str, dict[str, Any]] = {
     "CGCNN": dict(
         n_runs=10,
@@ -57,7 +58,7 @@
             display_name={"$regex": "m3gnet-wbm-IS2RE"},
         ),
     ),
-    "BOWSR MEGNet": dict(
+    "BOWSR + MEGNet": dict(
         n_runs=500,
         filters=dict(
             created_at={"$gt": "2023-01-20", "$lt": "2023-01-22"},
@@ -66,15 +67,14 @@
     ),
 }
 
-assert set(models) == set(PRED_FILENAMES), f"{set(models)=} != {set(PRED_FILENAMES)=}"
-
-
-model_stats: dict[str, dict[str, str | int | float]] = {}
+assert not (
+    unknown_models := set(models) - set(PRED_FILENAMES)
+), f"{unknown_models=} missing predictions file"
 
 
 # %% calculate total model run times from wandb logs
 # NOTE these model run times are pretty meaningless since some models were run on GPU
-# (Wrenformer and CGCNN), others on CPU. Also BOWSR MEGNet, M3GNet and MEGNet weren't
+# (Wrenformer and CGCNN), others on CPU. Also BOWSR + MEGNet, M3GNet and MEGNet weren't
 # trained from scratch. Their run times only indicate the time needed to predict the
 # test set.
 
@@ -110,24 +110,23 @@
     title=f"Run time distribution for {model}", xlabel="Run time [h]", ylabel="Count"
 )
 
+model_stats["M3GNet + MEGNet"] = model_stats["M3GNet"].copy()
+model_stats["M3GNet + MEGNet"][time_col] = (
+    model_stats["MEGNet"][time_col] + model_stats["M3GNet"][time_col]  # type: ignore
+)
+
 df_metrics = pd.DataFrame(model_stats).T
 df_metrics.index.name = "Model"
-# on 2022-11-28:
-# run_times = {'Voronoi Random Forest': 739608,
-#  'Wrenformer': 208399,
-#  'MEGNet': 12396,
-#  'M3GNet': 301138,
-#  'BOWSR MEGNet': 9105237}
 
 
 # %%
-df_wbm = load_df_wbm_preds(list(models))
+df_wbm = load_df_wbm_preds(list(model_stats))
 e_form_col = "e_form_per_atom_mp2020_corrected"
 each_true_col = "e_above_hull_mp2020_corrected_ppd_mp"
 
 
 # %%
-for model in models:
+for model in model_stats:
     each_pred = df_wbm[each_true_col] + df_wbm[model] - df_wbm[e_form_col]
 
     metrics = stable_metrics(df_wbm[each_true_col], each_pred)
@@ -165,12 +164,11 @@
 }
 df_styled.set_table_styles([dict(selector=sel, props=styles[sel]) for sel in styles])
 
-html_path = f"{FIGS}/{today}-metrics-table.svelte"
-df_styled.to_html(html_path)
+# df_styled.to_html(f"{FIGS}/{today}-metrics-table.svelte")
 
 
 # %% write model metrics to json for use by the website
-df_metrics["missing_preds"] = df_wbm[list(models)].isna().sum()
+df_metrics["missing_preds"] = df_wbm[list(model_stats)].isna().sum()
 df_metrics["missing_percent"] = [
     f"{x / len(df_wbm):.2%}" for x in df_metrics.missing_preds
 ]
 
@@ -9,7 +9,7 @@
 
 # %%
 # model = "Wrenformer"
-model = "M3GNet MEGNet"
+model = "M3GNet + MEGNet"
 ax, df_err, df_std = rolling_mae_vs_hull_dist(
     e_above_hull_true=df_wbm[each_true_col],
     e_above_hull_errors={model: df_wbm[e_form_col] - df_wbm[model]},
 
@@ -81,7 +81,6 @@ ul {
   padding-left: 1em;
 }
 label {
-  font-weight: bold;
   cursor: pointer;
 }
 img {
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-model_name: BOWSR MEGNet`
	`1`	`+model_name: BOWSR + MEGNet`
`2`	`2`	`model_version: 2022.9.20`
`3`	`3`	`matbench_discovery_version: 1.0`
`4`	`4`	`date_added: "2022-11-17"`
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,6 @@ ul {`
`81`	`81`	`padding-left: 1em;`
`82`	`82`	`}`
`83`	`83`	`label {`
`84`		`- font-weight: bold;`
`85`	`84`	`cursor: pointer;`
`86`	`85`	`}`
`87`	`86`	`img {`