add 2023-01-18-e-form-scatter-models.png to paper

janosh · janosh · commit d1751a35c0e0 · 2023-06-19T20:29:22.000-07:00
pnpm add -D svelte-preprocess-import-assets (used in svelte.config.js)
decrease katex font-size to 10pt
rename scripts/scatter_e_above_hull_models.py
use subscripts in plotly template quantity_labels
diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py
@@ -8,6 +8,7 @@
 
 ROOT = os.path.dirname(os.path.dirname(__file__))  # repository root
 FIGS = f"{ROOT}/site/static/figs"  # directory to store figures
+PAPER = f"{ROOT}/site/src/routes/paper/figs"  # directory to store figures
 # whether a currently running slurm job is in debug mode
 DEBUG = "DEBUG" in os.environ or (
     "slurm-submit" not in sys.argv and "SLURM_JOB_ID" not in os.environ
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -32,14 +32,15 @@
     n_wyckoff="Number of Wyckoff positions",
     n_sites="Lattice site count",
     energy_per_atom="Energy (eV/atom)",
-    e_form="Formation energy (eV/atom)",
-    e_above_hull="Energy above convex hull (eV/atom)",
-    e_above_hull_pred="Predicted energy above convex hull (eV/atom)",
-    e_above_hull_mp="Energy above MP convex hull (eV/atom)",
-    e_above_hull_error="Error in energy above convex hull (eV/atom)",
+    e_form="Actual E<sub>form</sub> (eV/atom)",
+    e_above_hull="E<sub>above hull</sub> (eV/atom)",
+    e_above_hull_mp2020_corrected_ppd_mp="Actual E<sub>above hull</sub> (eV/atom)",
+    e_above_hull_pred="Predicted E<sub>above hull</sub> (eV/atom)",
+    e_above_hull_mp="E<sub>above MP hull</sub> (eV/atom)",
+    e_above_hull_error="Error in E<sub>above hull</sub> (eV/atom)",
     vol_diff="Volume difference (A^3)",
-    e_form_per_atom_mp2020_corrected="Formation energy (eV/atom)",
-    e_form_per_atom_pred="Predicted formation energy (eV/atom)",
+    e_form_per_atom_mp2020_corrected="Actual E<sub>form</sub> (eV/atom)",
+    e_form_per_atom_pred="Predicted E<sub>form</sub> (eV/atom)",
     material_id="Material ID",
     band_gap="Band gap (eV)",
     formula="Formula",
@@ -60,6 +61,7 @@
     margin=dict(l=30, r=20, t=60, b=20),
     paper_bgcolor="rgba(0,0,0,0)",
     # plot_bgcolor="rgba(0,0,0,0)",
+    font_size=15,
 )
 pio.templates["global"] = dict(layout=global_layout)
 pio.templates.default = "plotly_dark+global"
diff --git a/scripts/scatter_e_above_hull_models.py b/scripts/scatter_e_above_hull_models.py
@@ -0,0 +1,156 @@
+# %%
+import numpy as np
+import plotly.graph_objects as go
+from pymatviz.utils import add_identity_line, save_fig
+from sklearn.metrics import r2_score
+
+from matbench_discovery import FIGS, PAPER, today
+from matbench_discovery.data import PRED_FILENAMES, load_df_wbm_with_preds
+from matbench_discovery.energy import classify_stable
+from matbench_discovery.plots import px
+
+__author__ = "Janosh Riebesell"
+__date__ = "2022-11-28"
+
+
+# %%
+print(f"loadable models: {list(PRED_FILENAMES)}")
+models = sorted(
+    "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
+)
+df_wbm = load_df_wbm_with_preds(models=models).round(3)
+
+e_form_col = "e_form_per_atom_mp2020_corrected"
+e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"
+id_col = "material_id"
+legend = dict(x=1, y=0, xanchor="right", yanchor="bottom", title=None)
+
+
+# %%
+e_form_preds = "e_form_per_atom_pred"
+e_above_hull_preds = "e_above_hull_pred"
+var_name = "Model"
+hover_cols = (id_col, e_form_col, e_above_hull_col, "formula")
+
+df_melt = df_wbm.melt(
+    id_vars=hover_cols,
+    value_vars=models,
+    var_name=var_name,
+    value_name=e_form_preds,
+)
+
+df_melt[e_above_hull_preds] = (
+    df_melt[e_above_hull_col] - df_melt[e_form_col] + df_melt[e_form_preds]
+)
+
+
+# %%
+def _metric_str(model_name: str) -> str:
+    MAE = (df_wbm[e_form_col] - df_wbm[model_name]).abs().mean()
+    R2 = r2_score(*df_wbm[[e_form_col, model_name]].dropna().to_numpy().T)
+    return f"{model_name} · {MAE=:.2} · R<sup>2</sup>={R2:.2}"
+
+
+def _add_metrics_to_legend(fig: go.Figure) -> None:
+    for trace in fig.data:
+        # initially hide all traces, let users select which models to compare
+        trace.visible = "legendonly"
+        # add MAE and R2 to legend
+        model = trace.name
+        trace.name = _metric_str(model)
+
+
+# %% scatter plot of actual vs predicted e_form_per_atom
+fig = px.scatter(
+    df_melt.iloc[::10],
+    x=e_form_col,
+    y=e_form_preds,
+    color=var_name,
+    hover_data=hover_cols,
+    hover_name=id_col,
+)
+
+_add_metrics_to_legend(fig)
+fig.update_layout(legend=legend)
+add_identity_line(fig)
+fig.show()
+
+
+# %%
+img_path = f"{FIGS}/{today}-e-form-scatter-models"
+# fig.write_image(f"{img_path}.pdf")
+save_fig(fig, f"{img_path}.svelte")
+
+
+# %% scatter plot of actual vs predicted e_above_hull
+fig = px.scatter(
+    df_melt.iloc[::10],
+    x=e_above_hull_col,
+    y=e_above_hull_preds,
+    color=var_name,
+    hover_data=hover_cols,
+    hover_name=id_col,
+)
+
+_add_metrics_to_legend(fig)
+fig.update_layout(legend=legend)
+add_identity_line(fig)
+fig.show()
+
+
+# %%
+img_path = f"{FIGS}/{today}-e-above-hull-scatter-models"
+# fig.write_image(f"{img_path}.pdf")
+save_fig(fig, f"{img_path}.svelte")
+
+
+# %% plot all models in separate subplots
+true_pos, false_neg, false_pos, true_neg = classify_stable(
+    df_melt[e_above_hull_col], df_melt[e_above_hull_preds]
+)
+
+df_melt["clf"] = np.array(
+    classes := ["true positive", "false negative", "false positive", "true negative"]
+)[true_pos * 0 + false_neg * 1 + false_pos * 2 + true_neg * 3]
+
+fig = px.scatter(
+    df_melt.iloc[::10],
+    x=e_above_hull_col,
+    y=e_above_hull_preds,
+    facet_col=var_name,
+    facet_col_wrap=3,
+    hover_data=hover_cols,
+    hover_name=id_col,
+    color="clf",
+    color_discrete_map=dict(zip(classes, ("green", "yellow", "red", "blue"))),
+    opacity=0.4,
+)
+
+# iterate over subplots and set new title
+for idx, model in enumerate(models, 1):
+
+    # add MAE and R2 to subplot title
+    MAE = (df_wbm[e_form_col] - df_wbm[model]).abs().mean()
+    R2 = r2_score(*df_wbm[[e_form_col, model]].dropna().to_numpy().T)
+    # find index of annotation belonging to model
+    anno_idx = [a.text for a in fig.layout.annotations].index(f"Model={model}")
+    assert anno_idx >= 0, f"could not find annotation for {model}"
+    # set new title
+    fig.layout.annotations[anno_idx].text = _metric_str(model)
+    # remove x and y axis titles if not on center row or center column
+    if idx != 2:
+        fig.layout[f"xaxis{idx}"].title.text = ""
+    if idx > 1:
+        fig.layout[f"yaxis{idx}"].title.text = ""
+    # add vertical and horizontal lines at 0
+    fig.add_vline(x=0, line=dict(width=1, dash="dash", color="gray"))
+    fig.add_hline(y=0, line=dict(width=1, dash="dash", color="gray"))
+
+id_line = add_identity_line(fig, ret_shape=True)
+fig.update_layout(showlegend=False)
+fig.update_xaxes(nticks=5)
+fig.update_yaxes(nticks=5)
+
+fig.show()
+img_path = f"{PAPER}/{today}-e-form-scatter-models.png"
+save_fig(fig, img_path, scale=4, width=1000, height=500)
diff --git a/site/package.json b/site/package.json
@@ -13,7 +13,7 @@
     "preview": "vite preview",
     "serve": "vite build && vite preview",
     "check": "svelte-check",
-    "make-api-docs": "cd .. && python ../scripts/make_api_docs.py"
+    "make-api-docs": "cd .. && python scripts/make_api_docs.py"
   },
   "devDependencies": {
     "@iconify/svelte": "^3.0.1",
@@ -37,6 +37,7 @@
     "svelte": "^3.55.1",
     "svelte-check": "^3.0.2",
     "svelte-preprocess": "^5.0.0",
+    "svelte-preprocess-import-assets": "^0.2.5",
     "svelte-toc": "^0.5.2",
     "svelte-zoo": "^0.2.1",
     "svelte2tsx": "^0.6.0",
diff --git a/site/src/app.css b/site/src/app.css
@@ -150,3 +150,7 @@ caption {
   display: block;
   margin: 1em auto 2em;
 }
+
+.math {
+  font-size: 10pt;
+}
diff --git a/site/src/routes/+error.svelte b/site/src/routes/+error.svelte
@@ -1,7 +1,7 @@
 <script lang="ts">
   import { page } from '$app/stores'
+  import { homepage, name } from '$site/package.json'
   import Icon from '@iconify/svelte'
-  import { homepage, name } from '../../package.json'
 
   let online: boolean
 </script>
@@ -29,7 +29,7 @@
 
   <p>
     Back to <a href=".">
-      <img src="/favicon.svg" alt={name} height="30" />
+      <img src="$static/favicon.svg" alt={name} height="30" />
       landing page
     </a>.
   </p>
diff --git a/site/src/routes/paper/+page.svx b/site/src/routes/paper/+page.svx
@@ -52,7 +52,7 @@ geometry: margin=3cm # https://stackoverflow.com/a/13516042
   import MetricsTable from '$figs/2022-11-28-metrics-table.svelte'
   import { references } from './references.yaml'
   import { References } from '$lib'
-  import './heading-number.css' // uncomment to remove heading numbers
+  import './heading-number.css' // CSS to auto-number headings
 </script>
 
 # {title}<br><small>{subtitle}</small>
@@ -180,13 +180,15 @@ Our benchmark is designed to make [adding future models easy](/how-to-contribute
   <MetricsTable />
 </div>
 
+![Scatter plots for each model's energy above hull predictions vs DFT ground truth](./figs/2023-01-18-e-form-scatter-models.png)
+
 ## Analysis
 
 ## Conclusion
 
 ## Acknowledgements
 
-JR acknowledges support from the German Academic Scholarship Foundation (Studienstiftung) and gracious hosting as a visiting affiliate in the group of KP.
+JR acknowledges support from the German Academic Scholarship Foundation (Studienstiftung) and gracious hosting as a visiting affiliate in the groups of [KP](https://perssongroup.lbl.gov/people) and [AJ](https://hackingmaterials.lbl.gov).
 
 ## References
 
diff --git a/site/svelte.config.js b/site/svelte.config.js
@@ -6,6 +6,7 @@ import katex from 'rehype-katex-svelte'
 import heading_slugs from 'rehype-slug'
 import math from 'remark-math'
 import preprocess from 'svelte-preprocess'
+import assets from 'svelte-preprocess-import-assets'
 
 const rehypePlugins = [
   katex,
@@ -35,13 +36,14 @@ export default {
 
   preprocess: [
     {
-      // preprocess markdown citations @auth_1st-word-title_yyyy into superscript
-      // links to bibliography items, href must match References.svelte
       markup: (file) => {
         if (file.filename.endsWith(`paper/+page.svx`)) {
+          // preprocess markdown citations @auth_1st-word-title_yyyy into superscript
+          // links to bibliography items, href must match id format in References.svelte
           const code = file.content.replace(
             /@((.+?)_.+?_(\d{4}))/g,
-            `<sup><a href="#$1">$2 $3</a></sup>`
+            (_full_str, bib_id, author, year) =>
+              `<sup><a href="#${bib_id}">${author} ${year}</a></sup>`
           )
           return { code }
         }
@@ -57,6 +59,7 @@ export default {
       remarkPlugins: [math],
       extensions: [`.svx`, `.md`],
     }),
+    assets(),
   ],
 
   kit: {
@@ -65,6 +68,7 @@ export default {
     alias: {
       $site: `.`,
       $root: `..`,
+      $static: `./static`,
       $figs: `./static/figs`,
     },
   },

Original file line number	Diff line number	Diff line change
`@@ -150,3 +150,7 @@ caption {`
`150`	`150`	`display: block;`
`151`	`151`	`margin: 1em auto 2em;`
`152`	`152`	`}`
	`153`	`+`
	`154`	`+.math {`
	`155`	`+ font-size: 10pt;`
	`156`	`+}`