janosh
diff --git a/‎data/wbm/analysis.py
+1-1 b/‎data/wbm/analysis.py
+1-1
diff --git a/‎data/wbm/fetch_process_wbm_dataset.py
+5-4 b/‎data/wbm/fetch_process_wbm_dataset.py
+5-4
diff --git a/‎data/wbm/readme.md
+2-2 b/‎data/wbm/readme.md
+2-2
diff --git a/‎matbench_discovery/data.py
+1-10 b/‎matbench_discovery/data.py
+1-10
diff --git a/‎matbench_discovery/metrics.py
+16-17 b/‎matbench_discovery/metrics.py
+16-17
diff --git a/‎matbench_discovery/plots.py
+18-14 b/‎matbench_discovery/plots.py
+18-14
diff --git a/‎matbench_discovery/preds.py
+8-10 b/‎matbench_discovery/preds.py
+8-10
diff --git a/‎models/bowsr/metadata.yml
+2 b/‎models/bowsr/metadata.yml
+2
diff --git a/‎models/cgcnn/metadata.yml
+8 b/‎models/cgcnn/metadata.yml
+8
diff --git a/‎models/m3gnet/metadata.yml
+3 b/‎models/m3gnet/metadata.yml
+3
diff --git a/‎models/megnet/metadata.yml
+2 b/‎models/megnet/metadata.yml
+2
diff --git a/‎models/voronoi/metadata.yml
+4-1 b/‎models/voronoi/metadata.yml
+4-1
diff --git a/‎models/wrenformer/metadata.yml
+4 b/‎models/wrenformer/metadata.yml
+4
@@ -112,7 +112,7 @@
     fig.update_layout(title=dict(text=title, x=0.5, y=0.95))
 
 fig.update_layout(showlegend=False, paper_bgcolor="rgba(0,0,0,0)")
-fig.update_xaxes(title_text="WBM energy above MP convex hull (eV/atom)")
+fig.update_xaxes(title="WBM energy above MP convex hull (eV/atom)")
 
 for x_pos, label in zip(
     [mean, mean + std, mean - std],
 
@@ -21,7 +21,7 @@
 from pymatviz.utils import save_fig
 from tqdm import tqdm
 
-from matbench_discovery import ROOT, today
+from matbench_discovery import FIGS, ROOT, today
 from matbench_discovery.energy import get_e_form_per_atom
 from matbench_discovery.plots import pio
 
@@ -436,6 +436,7 @@ def fix_bad_struct_index_mismatch(material_id: str) -> str:
 fig = df_summary.hist(
     x="e_form_per_atom_wbm", backend="plotly", log_y=True, range_x=[-5.5, 5.5]
 )
+fig_compressed = False
 fig.add_vline(x=e_form_cutoff, line=dict(dash="dash"))
 fig.add_vline(x=-e_form_cutoff, line=dict(dash="dash"))
 fig.add_annotation(
@@ -458,13 +459,13 @@ def fix_bad_struct_index_mismatch(material_id: str) -> str:
 # %%
 # no need to store all 250k x values in plot, leads to 1.7 MB file, subsample every 10th
 # point is enough to see the distribution
-if not fig.data[0].compressed:
-    fig.data[0].compressed = True
+if not fig_compressed:
+    fig_compressed = True
     # keep only every 10th data point, round to 3 decimal places to reduce file size
     fig.data[0].x = [round(x, 3) for x in fig.data[0].x[::10]]
 
 # recommended to upload SVG to vecta.io/nano afterwards for compression
-img_path = f"{module_dir}/2022-12-07-hist-wbm-e-form-per-atom"
+img_path = f"{FIGS}/hist-wbm-e-form-per-atom"
 # save_fig(fig, f"{img_path}.svg", width=800, height=300)
 save_fig(fig, f"{img_path}.svelte")
 
 
@@ -25,7 +25,7 @@ The full set of processing steps used to curate the WBM test set from the raw da
 
   <caption>WBM Formation energy distribution. 524 materials outside green dashed lines were discarded.<br />(zoom out on this plot to see discarded samples)</caption>
   <slot name="hist-e-form-per-atom">
-    <img src="./figs/2022-12-07-hist-wbm-e-form-per-atom.svg" alt="WBM formation energy histogram indicating outlier cutoffs">
+    <img src="./figs/wbm-e-form-per-atom.svg" alt="WBM formation energy histogram indicating outlier cutoffs">
   </slot>
 
 - apply the [`MaterialsProject2020Compatibility`](https://pymatgen.org/pymatgen.entries.compatibility.html#pymatgen.entries.compatibility.MaterialsProject2020Compatibility) energy correction scheme to the formation energies
@@ -99,5 +99,5 @@ The number of stable materials (according to the MP convex hull which is spanned
 > Note: [According to the authors](https://www.nature.com/articles/s41524-020-00481-6#Sec2), the stability rate w.r.t. to the more complete hull constructed from the combined train and test set (MP + WBM) for the first 3 rounds of elemental substitution is 18,479 out of 189,981 crystals ($\approx$ 9.7%).
 
 <slot name="wbm-each-hist">
-  <img src="./figs/2023-01-26-wbm-each-hist.svg" alt="WBM energy above MP convex hull distribution">
+  <img src="./figs/wbm-each-hist.svg" alt="WBM energy above MP convex hull distribution">
 </slot>
@@ -184,11 +184,7 @@ def glob_to_df(
 
 
 def load_df_wbm_preds(
-    models: Sequence[str],
-    pbar: bool = True,
-    id_col: str = "material_id",
-    return_model_dfs: bool = False,
-    **kwargs: Any,
+    models: Sequence[str], pbar: bool = True, id_col: str = "material_id", **kwargs: Any
 ) -> pd.DataFrame:
     """Load WBM summary dataframe with model predictions from disk.
 
@@ -197,8 +193,6 @@ def load_df_wbm_preds(
             matbench_discovery.data.PRED_FILENAMES.
         pbar (bool, optional): Whether to show progress bar. Defaults to True.
         id_col (str, optional): Column to set as df.index. Defaults to "material_id".
-        return_model_dfs (bool, optional): Whether to return dict of dataframes for each
-            model dfs. Defaults to False.
         **kwargs: Keyword arguments passed to glob_to_df().
 
     Raises:
@@ -218,9 +212,6 @@ def load_df_wbm_preds(
         df = glob_to_df(pattern, pbar=False, **kwargs).set_index(id_col)
         dfs[model_name] = df
 
-    if return_model_dfs:
-        return dfs
-
     df_out = df_wbm.copy()
     for model_name, df in dfs.items():
         model_key = model_name.lower().replace(" + ", "_").replace(" ", "_")
 
@@ -51,15 +51,17 @@ def classify_stable(
 
 
 def stable_metrics(
-    true: Sequence[float], pred: Sequence[float], stability_threshold: float = 0
+    each_true: Sequence[float],
+    each_pred: Sequence[float],
+    stability_threshold: float = 0,
 ) -> dict[str, float]:
     """
     Get a dictionary of stability prediction metrics. Mostly binary classification
     metrics, but also MAE, RMSE and R2.
 
     Args:
-        true (list[float]): true energy values
-        pred (list[float]): predicted energy values
+        each_true (list[float]): true energy above convex hull
+        each_pred (list[float]): predicted energy above convex hull
         stability_threshold (float): Where to place stability threshold relative to
             convex hull in eV/atom, usually 0 or 0.1 eV. Defaults to 0.
 
@@ -71,34 +73,31 @@ def stable_metrics(
         dict[str, float]: dictionary of classification metrics with keys DAF, Precision,
             Recall, Accuracy, F1, TPR, FPR, TNR, FNR, MAE, RMSE, R2.
     """
-    true_pos, false_neg, false_pos, true_neg = classify_stable(
-        true, pred, stability_threshold
-    )
-
-    n_true_pos, n_false_pos, n_true_neg, n_false_neg = map(
-        sum, (true_pos, false_pos, true_neg, false_neg)
+    n_true_pos, n_false_neg, n_false_pos, n_true_neg = map(
+        sum, classify_stable(each_true, each_pred, stability_threshold)
     )
 
     n_total_pos = n_true_pos + n_false_neg
     n_total_neg = n_true_neg + n_false_pos
-    prevalence = n_total_pos / len(true)  # null rate
-    precision = n_true_pos / (n_true_pos + n_false_pos)
+    # prevalence: dummy discovery rate of selecting randomly from all materials
+    prevalence = n_total_pos / len(each_true)
+    precision = n_true_pos / (n_true_pos + n_false_pos)  # model's discovery rate
     recall = n_true_pos / n_total_pos
 
-    is_nan = np.isnan(true) | np.isnan(pred)
-    true, pred = np.array(true)[~is_nan], np.array(pred)[~is_nan]
+    is_nan = np.isnan(each_true) | np.isnan(each_pred)
+    each_true, each_pred = np.array(each_true)[~is_nan], np.array(each_pred)[~is_nan]
 
     return dict(
         DAF=precision / prevalence,
         Precision=precision,
         Recall=recall,
-        Accuracy=(n_true_pos + n_true_neg) / len(true),
+        Accuracy=(n_true_pos + n_true_neg) / len(each_true),
         F1=2 * (precision * recall) / (precision + recall),
         TPR=n_true_pos / n_total_pos,
         FPR=n_false_pos / n_total_neg,
         TNR=n_true_neg / n_total_neg,
         FNR=n_false_neg / n_total_pos,
-        MAE=np.abs(true - pred).mean(),
-        RMSE=((true - pred) ** 2).mean() ** 0.5,
-        R2=r2_score(true, pred),
+        MAE=np.abs(each_true - each_pred).mean(),
+        RMSE=((each_true - each_pred) ** 2).mean() ** 0.5,
+        R2=r2_score(each_true, each_pred),
     )
@@ -20,8 +20,6 @@
 __author__ = "Janosh Riebesell"
 __date__ = "2022-08-05"
 
-WhichEnergy = Literal["true", "pred"]
-AxLine = Literal["x", "y", "xy", ""]
 Backend = Literal["matplotlib", "plotly"]
 
 # --- start global plot settings
@@ -58,22 +56,20 @@
 )
 px.defaults.labels = quantity_labels | model_labels
 
-# https://plotly.com/python-api-reference/generated/plotly.graph_objects.layout
+# color list https://plotly.com/python-api-reference/generated/plotly.graph_objects.layout
 colorway = (
     "lightseagreen",
     "orange",
     "lightsalmon",
     "dodgerblue",
-    "aquamarine",
-    "purple",
-    "firebrick",
 )
 clf_labels = ("True Positive", "False Negative", "False Positive", "True Negative")
-clf_color_map = dict(zip(clf_labels, colorway))
+clf_colors = ("lightseagreen", "orange", "lightsalmon", "dodgerblue")
+clf_color_map = dict(zip(clf_labels, clf_colors))
 
 global_layout = dict(
     # colorway=px.colors.qualitative.Pastel,
-    colorway=colorway,
+    # colorway=colorway,
     margin=dict(l=30, r=20, t=60, b=20),
     paper_bgcolor="rgba(0,0,0,0)",
     # plot_bgcolor="rgba(0,0,0,0)",
@@ -101,7 +97,7 @@ def hist_classified_stable_vs_hull_dist(
     each_true_col: str,
     each_pred_col: str,
     ax: plt.Axes = None,
-    which_energy: WhichEnergy = "true",
+    which_energy: Literal["true", "pred"] = "true",
     stability_threshold: float | None = 0,
     x_lim: tuple[float | None, float | None] = (-0.7, 0.7),
     rolling_acc: float | None = 0.02,
@@ -133,7 +129,7 @@ def hist_classified_stable_vs_hull_dist(
             (in eV / atom). Same as true energy to convex hull plus predicted minus true
             formation energy.
         ax (plt.Axes, optional): matplotlib axes to plot on.
-        which_energy (WhichEnergy, optional): Whether to use the true (DFT) hull
+        which_energy ('true' | 'pred', optional): Whether to use the true (DFT) hull
             distance or the model's predicted hull distance for the histogram.
         stability_threshold (float, optional): set stability threshold as distance to
             convex hull in eV/atom, usually 0 or 0.1 eV.
@@ -376,7 +372,7 @@ def rolling_mae_vs_hull_dist(
 
     window_bar_anno = f"rolling window={2 * window * 1000:.0f} meV"
     dummy_mae = (e_above_hull_true - e_above_hull_true.mean()).abs().mean()
-    legend_title = f"dummy MAE = {dummy_mae:.2f} eV/atom"
+    dummy_mae_text = f"dummy MAE = {dummy_mae:.2f} eV/atom"
 
     if backend == "matplotlib":
         # assert df_rolling_err.isna().sum().sum() == 0, "NaNs in df_rolling_err"
@@ -430,6 +426,9 @@ def rolling_mae_vs_hull_dist(
                 horizontalalignment="right",
             )
 
+        ax.axhline(dummy_mae, color="tab:blue", linestyle="--", linewidth=0.5)
+        ax.text(dummy_mae, 0.1, dummy_mae_text)
+
         ax.text(
             0, 0.13, r"MAE > $|E_\mathrm{above\ hull}|$", horizontalalignment="center"
         )
@@ -456,7 +455,7 @@ def rolling_mae_vs_hull_dist(
             )
 
         ax.layout.legend.update(
-            title=legend_title,
+            title="",
             x=1,
             y=0,
             xanchor="right",
@@ -484,6 +483,11 @@ def rolling_mae_vs_hull_dist(
             showarrow=False,
             yref="paper",
         )
+        ax.add_hline(
+            y=dummy_mae,
+            line=dict(dash="dash", width=0.5),
+            annotation_text=dummy_mae_text,
+        )
         if show_dft_acc:
             ax.add_scatter(
                 x=(-dft_acc, dft_acc, 0, -dft_acc),
@@ -536,7 +540,7 @@ def cumulative_precision_recall(
     metrics: Sequence[str] = ("Precision", "Recall"),
     stability_threshold: float = 0,  # set stability threshold as distance to convex
     # hull in eV / atom, usually 0 or 0.1 eV
-    project_end_point: AxLine = "xy",
+    project_end_point: Literal["x", "y", "xy", ""] = "xy",
     optimal_recall: str | None = "Optimal Recall",
     show_n_stable: bool = True,
     backend: Backend = "plotly",
@@ -692,7 +696,7 @@ def cumulative_precision_recall(
             **kwargs,
         )
 
-        line_kwds = dict(color="white", dash="dash", width=0.5)
+        line_kwds = dict(dash="dash", width=0.5)
         for idx, anno in enumerate(fig.layout.annotations):
             anno.text = anno.text.split("=")[1]
             anno.font.size = 16
 
@@ -2,7 +2,7 @@
 
 import pandas as pd
 
-from matbench_discovery.data import load_df_wbm_preds
+from matbench_discovery.data import PRED_FILENAMES, load_df_wbm_preds
 from matbench_discovery.metrics import stable_metrics
 
 """Centralize data-loading and computing metrics for plotting scripts"""
@@ -18,20 +18,18 @@
 each_true_col = "e_above_hull_mp2020_corrected_ppd_mp"
 each_pred_col = "e_above_hull_pred"
 
-df_wbm = load_df_wbm_preds(models).round(3)
-
-for col in [e_form_col, each_true_col]:
-    assert col in df_wbm, f"{col=} not in {list(df_wbm)=}"
+df_wbm = load_df_wbm_preds(list(PRED_FILENAMES)).round(3)
+drop_cols = {*PRED_FILENAMES} - {*models}
 
 
 df_metrics = pd.DataFrame()
-for model in models:
+for model in list(PRED_FILENAMES):
     df_metrics[model] = stable_metrics(
         df_wbm[each_true_col],
         df_wbm[each_true_col] + df_wbm[model] - df_wbm[e_form_col],
     )
 
-assert df_metrics.T.MAE.between(0, 0.2).all(), "MAE not in range"
-assert df_metrics.T.R2.between(0.1, 1).all(), "R2 not in range"
-assert df_metrics.T.RMSE.between(0, 0.25).all(), "RMSE not in range"
-assert df_metrics.isna().sum().sum() == 0, "NaNs in metrics"
+
+df_each_pred = pd.DataFrame()
+for model in df_metrics.T.MAE.sort_values().index:
+    df_each_pred[model] = df_wbm[each_true_col] + df_wbm[model] - df_wbm[e_form_col]
@@ -33,4 +33,6 @@ hyperparams:
     n_iter: 100
 
 notes:
+  description: BOWSR is a Bayesian optimizer with symmetry constraints using a graph deep learning energy model to perform "DFT-free" relaxations of crystal structures.
+  long: The authors show that this iterative approach improves the accuracy of ML-predicted formation energies over single-shot predictions.
   training: Uses same version of MEGNet as standalone MEGNet.
@@ -24,6 +24,10 @@
   hyperparams:
     Ensemble Size: 10
 
+  notes:
+    description: Published in 2017, CGCNN was the first crystal graph convolutional neural network to directly learn 8 different DFT-computed material properties from a graph representing the atoms and bonds in a crystal.
+    long: It showed that just like in other areas of ML, given large training sets, embeddings that outperform human-engineered features could be learned directly from the data.
+
 - model_name: CGCNN+P
   model_version: 0.1.0 # the aviary version
   matbench_discovery_version: 1.0
@@ -54,3 +58,7 @@
   hyperparams:
     Ensemble Size: 10
     Perturbations: 5
+
+  notes:
+    description: This work proposes simple, physically motivated structure perturbations to augment CGCNN's training data of relaxed structures with structures resembling unrelaxed ones but mapped to the same DFT final energy.
+    long: From this the model should learn to map structures to their nearest energy basin which is supported by a lowering of the energy error on unrelaxed structures.
@@ -22,6 +22,8 @@
     pandas: 1.5.1
   trained_on_benchmark: false
   notes:
+    description: M3GNet is a GNN-based universal (as in full periodic table) interatomic potential for materials trained on up to 3-body interactions in the initial, middle and final frame of MP DFT relaxations.
+    long: It thereby learns to emulate structure relaxation, MD simulations and property prediction of materials across diverse chemical spaces.
     training: Using pre-trained model released with paper. Was only trained on a subset of 62,783 MP relaxation trajectories in the 2018 database release (see [related issue](https://github.com/materialsvirtuallab/m3gnet/issues/20#issuecomment-1207087219)).
 
 - model_name: M3GNet + MEGNet
@@ -58,4 +60,5 @@
     pandas: 1.5.1
   trained_on_benchmark: false
   notes:
+    description: This combination of models uses M3GNet to relax initial structures and then passes it to MEGNet to predict the formation energy.
     training: Using pre-trained model released with paper. Was only trained on a subset of 62,783 MP relaxation trajectories in the 2018 database release (see [related issue](https://github.com/materialsvirtuallab/m3gnet/issues/20#issuecomment-1207087219)).
@@ -29,5 +29,7 @@ requirements:
   numpy: 1.24.0
   pandas: 1.5.1
 trained_on_benchmark: false
+
 notes:
+  description: MatErials Graph Network is another GNN for material properties of relaxed structure which showed that learned element embeddings encode periodic chemical trends and can be transfer-learned from large data sets (formation energies) to predictions on small data properties (band gaps, elastic moduli).
   training: Using pre-trained model released with paper. Was only trained on `MP-crystals-2018.6.1` dataset [available on Figshare](https://figshare.com/articles/Graphs_of_materials_project/7451351).
@@ -14,11 +14,14 @@ authors:
     orcid: https://orcid.org/0000-0003-2248-474X
 repo: https://github.com/janosh/matbench-discovery
 doi: https://doi.org/10.1103/PhysRevB.96.024104
-preprint: https://arxiv.org/abs/2106.11132
 requirements:
   matminer: 0.8.0
   scikit-learn: 1.1.2
   pymatgen: 2022.10.22
   numpy: 1.24.0
   pandas: 1.5.1
 trained_on_benchmark: true
+
+notes:
+  description: A random forest trained to map the combo of composition-based Magpie features and structure-based relaxation-invariant Voronoi tessellation features (bond angles, coordination numbers, ...) to DFT formation energies.
+  long: This is an old model that predates most deep learning for materials but significantly improved over Coulomb matrix and partial radial distribution function methods. It therefore serves as a good baseline model to see what modern ML buys us.
@@ -28,3 +28,7 @@ trained_on_benchmark: true
 
 hyperparams:
   Ensemble Size: 10
+
+notes:
+  description: Wrenformer is a standard PyTorch Transformer Encoder trained to learn material embeddings from composition, space group, Wyckoff positions in a structure.
+  long: It builds on [Roost](https://doi.org/10.1038/s41467-020-19964-7) and [Wren](https://doi.org/10.1126/sciadv.abn4117), by being a fast structure-free model that is still able to distinguish polymorphs through symmetry.