add scripts/difficult_structures.py

janosh · janosh · commit db34c092502e · 2023-06-19T20:29:23.000-07:00
diff --git a/models/cgcnn/metadata.yml b/models/cgcnn/metadata.yml
@@ -25,7 +25,7 @@
     Ensemble Size: 10
 
   notes:
-    description: Published in 2017, CGCNN was the first crystal graph convolutional neural network to directly learn 8 different DFT-computed material properties from a graph representing the atoms and bonds in a crystal.
+    description: Published in 2017, CGCNN was the first crystal graph convolutional neural network to directly learn 8 different DFT-computed material properties from a graph representing the atoms and bonds in a crystal. ![Illustration of the crystal graph convolutional neural networks](https://researchgate.net/profile/Tian-Xie-11/publication/320726915/figure/fig1/AS:635258345119746@1528468800829/Illustration-of-the-crystal-graph-convolutional-neural-networks-a-Construction-of-the.png)
     long: It showed that just like in other areas of ML, given large training sets, embeddings that outperform human-engineered features could be learned directly from the data.
 
 - model_name: CGCNN+P
@@ -60,5 +60,5 @@
     Perturbations: 5
 
   notes:
-    description: This work proposes simple, physically motivated structure perturbations to augment CGCNN's training data of relaxed structures with structures resembling unrelaxed ones but mapped to the same DFT final energy.
-    long: From this the model should learn to map structures to their nearest energy basin which is supported by a lowering of the energy error on unrelaxed structures.
+    description: This work proposes simple structure perturbations to augment CGCNN's training data of relaxed structures with randomly perturbed ones resembling unrelaxed structures that are mapped to the same DFT final energy during training. ![Step function PES](https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41524-022-00891-8/MediaObjects/41524_2022_891_Fig1_HTML.png?as=webp)
+    long: The model is essentially taught the potential energy surface (PES) is a step-function that maps each valley to its local minimum. The expectation is that during testing on unrelaxed structures, the model will predict the energy of the nearest basin in the PES. The authors confirm this by demonstrating a lowering of the energy error on unrelaxed structures.
diff --git a/scripts/compile_metrics.py b/scripts/compile_metrics.py
@@ -121,37 +121,36 @@
 
 
 # %%
+higher_is_better = ["DAF", "R²", "Precision", "Recall", "F1", "Accuracy", "TPR", "TNR"]
+lower_is_better = ["MAE", "RMSE", "FNR", "FPR"]
 styler = (
     df_metrics.T.rename(columns={"R2": "R²"})
+    # append arrow up/down to table headers to indicate higher/lower metric is better
+    # .rename(columns=lambda x: x + " ↑" if x in higher_is_better else x + " ↓")
     .style.format(precision=2)
-    .background_gradient(
-        cmap="viridis_r",  # lower is better so reverse color map
-        subset=["MAE", "RMSE", "FNR", "FPR"],
-    )
+    # reverse color map if lower=better
+    .background_gradient(cmap="viridis_r", subset=lower_is_better)
     # .background_gradient(
     #     cmap="viridis_r",
     #     subset=[time_col],
     #     gmap=np.log10(df_stats[time_col].to_numpy()),  # for log scaled color map
     # )
-    .background_gradient(
-        cmap="viridis",  # higher is better
-        subset=["DAF", "R²", "Precision", "Recall", "F1", "Accuracy", "TPR", "TNR"],
-    )
+    .background_gradient(cmap="viridis", subset=higher_is_better)
 )
-
 styles = {
     "": "font-family: sans-serif; border-collapse: collapse;",
-    "td, th": "border: 1px solid #ddd; text-align: left; padding: 8px; white-space: nowrap;",
+    "td, th": "border: none; padding: 4px 6px; white-space: nowrap;",
+    "th": "border: 1px solid; border-width: 1px 0; text-align: left;",
 }
 styler.set_table_styles([dict(selector=sel, props=styles[sel]) for sel in styles])
 styler.set_uuid("")
 
 
 # %% export model metrics as styled HTML table
 # insert svelte {...props} forwarding to the table element
-html = styler.to_html().replace("<table", "<table {...$$props}")
+html_table = styler.to_html().replace("<table", "<table {...$$props}")
 with open(f"{FIGS}/metrics-table.svelte", "w") as file:
-    file.write(html)
+    file.write(html_table)
 
 
 # %%
diff --git a/scripts/difficult_structures.py b/scripts/difficult_structures.py
@@ -0,0 +1,105 @@
+# %%
+import matplotlib.pyplot as plt
+import pandas as pd
+from pymatgen.core import Structure
+from pymatviz import plot_structure_2d, ptable_heatmap_plotly
+
+from matbench_discovery import ROOT
+from matbench_discovery.metrics import classify_stable
+from matbench_discovery.preds import df_each_err, df_each_pred, df_wbm, each_true_col
+
+__author__ = "Janosh Riebesell"
+__date__ = "2023-02-15"
+
+df_each_err[each_true_col] = df_wbm[each_true_col]
+mean_ae_col = "All models mean absolute error (eV/atom)"
+df_each_err[mean_ae_col] = df_wbm[mean_ae_col] = df_each_err.abs().mean(axis=1)
+
+
+# %%
+cse_path = f"{ROOT}/data/wbm/2022-10-19-wbm-computed-structure-entries.json.bz2"
+df_cse = pd.read_json(cse_path).set_index("material_id")
+
+
+# %%
+n_rows, n_cols = 5, 4
+for which in ("best", "worst"):
+    fig, axs = plt.subplots(n_rows, n_cols, figsize=(3 * n_rows, 4 * n_cols))
+    n_axs = len(axs.flat)
+
+    errs = (
+        df_each_err.mean_ae.nsmallest(n_axs)
+        if which == "best"
+        else df_each_err.mean_ae.nlargest(n_axs)
+    )
+    title = f"{which} {len(errs)} structures (across {len(list(df_each_pred))} models)"
+    fig.suptitle(title, fontsize=16, fontweight="bold", y=0.95)
+
+    for idx, (ax, (id, err)) in enumerate(zip(axs.flat, errs.items()), 1):
+        struct = Structure.from_dict(
+            df_cse.computed_structure_entry.loc[id]["structure"]
+        )
+        plot_structure_2d(struct, ax=ax)
+        _, spg_num = struct.get_space_group_info()
+        formula = struct.composition.reduced_formula
+        ax.set_title(
+            f"{idx}. {formula} (spg={spg_num})\n{id} {err=:.2f}", fontweight="bold"
+        )
+
+    fig.savefig(f"{ROOT}/tmp/figures/{which}-{len(errs)}-structures.webp", dpi=300)
+
+
+# %% plotly scatter plot of largest model errors with points sized by mean error and
+# colored by true stability
+fig = df_wbm.nlargest(200, mean_ae_col).plot.scatter(
+    x=each_true_col,
+    y=mean_ae_col,
+    color=each_true_col,
+    size=mean_ae_col,
+    backend="plotly",
+)
+fig.layout.coloraxis.colorbar.update(
+    title="DFT distance to convex hull (eV/atom)",
+    title_side="top",
+    yanchor="bottom",
+    y=1,
+    xanchor="center",
+    x=0.5,
+    orientation="h",
+    thickness=12,
+)
+fig.show()
+
+
+# %% find materials that were misclassified by all models
+for model in df_each_pred:
+    true_pos, false_neg, false_pos, true_neg = classify_stable(
+        df_each_pred[model], df_wbm[each_true_col]
+    )
+    df_wbm[f"{model}_true_pos"] = true_pos
+    df_wbm[f"{model}_false_neg"] = false_neg
+    df_wbm[f"{model}_false_pos"] = false_pos
+    df_wbm[f"{model}_true_neg"] = true_neg
+
+
+df_wbm["all_true_pos"] = df_wbm.filter(like="_true_pos").all(axis=1)
+df_wbm["all_false_neg"] = df_wbm.filter(like="_false_neg").all(axis=1)
+df_wbm["all_false_pos"] = df_wbm.filter(like="_false_pos").all(axis=1)
+df_wbm["all_true_neg"] = df_wbm.filter(like="_true_neg").all(axis=1)
+
+df_wbm.filter(like="all_").sum()
+
+
+# %%
+ptable_heatmap_plotly(df_wbm[df_wbm.all_false_pos].formula, colorscale="Viridis")
+ptable_heatmap_plotly(df_wbm[df_wbm.all_false_neg].formula, colorscale="Viridis")
+
+
+# %%
+df_each_err.abs().mean().sort_values()
+df_each_err.abs().mean(axis=1).nlargest(25)
+
+
+# %% get mean distance to convex hull for each classification
+df_wbm.query("all_true_pos").describe()
+df_wbm.query("all_false_pos").describe()
diff --git a/scripts/prc_roc_curves_models.py b/scripts/prc_roc_curves_models.py
@@ -66,7 +66,14 @@
     anno.text = anno.text.split("=", 1)[1]  # remove Model= from subplot titles
 
 fig.layout.coloraxis.colorbar.update(
-    x=1, y=1, xanchor="right", yanchor="top", thickness=14, len=0.2, title_side="right"
+    x=1,
+    y=1,
+    xanchor="right",
+    yanchor="top",
+    thickness=14,
+    lenmode="pixels",
+    len=210,
+    title_side="right",
 )
 fig.add_shape(type="line", x0=0, y0=0, x1=1, y1=1, line=line, row="all", col="all")
 fig.add_annotation(text="No skill", x=0.5, y=0.5, showarrow=False, yshift=-10)
diff --git a/site/src/figs/metrics-table.svelte b/site/src/figs/metrics-table.svelte
@@ -4,17 +4,20 @@
   border-collapse: collapse;
 }
 #T_ td {
-  border: 1px solid #ddd;
-  text-align: left;
-  padding: 8px;
+  border: none;
+  padding: 4px 6px;
   white-space: nowrap;
 }
 #T_  th {
-  border: 1px solid #ddd;
-  text-align: left;
-  padding: 8px;
+  border: none;
+  padding: 4px 6px;
   white-space: nowrap;
 }
+#T_ th {
+  border: 1px solid;
+  border-width: 1px 0;
+  text-align: left;
+}
 #T__row0_col0, #T__row0_col2, #T__row0_col3, #T__row0_col5, #T__row0_col10, #T__row1_col7, #T__row1_col8, #T__row3_col4, #T__row3_col6, #T__row3_col9, #T__row5_col1, #T__row5_col11 {
   background-color: #440154;
   color: #f1f1f1;
diff --git a/site/src/routes/about-the-test-set/tmi/+page.svelte b/site/src/routes/about-the-test-set/tmi/+page.svelte
@@ -1,11 +1,7 @@
 <script lang="ts">
   import { ElemCountInset } from '$lib'
-  import {
-    ColorScaleSelect,
-    PeriodicTable,
-    TableInset,
-    type ChemicalElement,
-  } from 'elementari'
+  import type { ChemicalElement } from 'elementari'
+  import { ColorScaleSelect, PeriodicTable, TableInset } from 'elementari'
   import { RadioButtons, Toggle } from 'svelte-zoo'
   import type { Snapshot } from './$types'
 
@@ -27,7 +23,7 @@
   $: color_scale = selected[0]
   $: active_counts = elem_counts[filter]
 
-  const style = `display: flex; gap: 5pt; place-items: center; place-content: center;`
+  const style = `display: flex; place-items: center; place-content: center;`
 
   export const snapshot: Snapshot = {
     capture: () => ({ filter, log }),
@@ -42,8 +38,9 @@ Stuff that didn't make the cut into the main page describing the WBM test set.
 
 <h2>WBM Element Counts for <code>{filter}</code></h2>
 
-Filter WBM element counts by composition arity (how many elements in the formula) or batch
-index (which iteration of elemental substitution the structure was generated in).
+Filter WBM element counts by composition<strong>arity</strong> (how many elements in the
+formula) or <strong>batch index</strong> (which iteration of elemental substitution the
+structure was generated in).
 
 <ColorScaleSelect bind:selected />
 <ul>
@@ -77,11 +74,11 @@ index (which iteration of elemental substitution the structure was generated in)
     display: flex;
     gap: 1ex;
   }
-  strong {
+  ul > li strong {
     background-color: rgba(255, 255, 255, 0.1);
     padding: 3pt 4pt;
   }
-  strong.active {
+  ul > li strong.active {
     background-color: teal;
   }
 </style>