add cumulative F1 score to cumulative precision recall plot

janosh · janosh · commit 6e3627801ab1 · 2023-06-19T20:29:22.000-07:00
update figures/2022-12-05-precision-recall-curves.svelte and render with neg margins to increase width
diff --git a/data/wbm/2022-10-19-hist-e-form-per-atom.png b/data/wbm/2022-10-19-hist-e-form-per-atom.png
diff --git a/data/wbm/readme.md b/data/wbm/readme.md
@@ -20,7 +20,7 @@ The full set of processing steps used to curate the WBM test set from the raw da
 - correctly aligning initial structures to DFT-relaxed [`ComputedStructureEntries`](https://pymatgen.org/pymatgen.entries.computed_entries.html#pymatgen.entries.computed_entries.ComputedStructureEntry)
 - remove 6 pathological structures (with 0 volume)
 - remove formation energy outliers below -5 and above 5 eV/atom (502 and 22 crystals respectively out of 257,487 total, including an anomaly of 500 structures at exactly -10 eV/atom)
-  <!-- ![WBM formation energy histogram indicating outlier cutoffs](2022-12-07-hist-e-form-per-atom.png) -->
+  ![WBM formation energy histogram indicating outlier cutoffs](2022-12-07-hist-e-form-per-atom.png)
 - apply the [`MaterialsProject2020Compatibility`](https://pymatgen.org/pymatgen.entries.compatibility.html#pymatgen.entries.compatibility.MaterialsProject2020Compatibility) energy correction scheme to the formation energies
 - compute energy to the Materials Project convex hull constructed from all MP `ComputedStructureEntries` queried on 2022-09-16 ([database release 2021.05.13](https://docs.materialsproject.org/changes/database-versions#v2021.05.13))
 
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -489,8 +489,8 @@ def cumulative_precision_recall(
         tuple[plt.Figure | go.Figure, pd.DataFrame]: The matplotlib/plotly figure and
             dataframe of cumulative metrics for each model.
     """
-    fact = lambda: pd.DataFrame(index=range(len(e_above_hull_true)))
-    dfs = dict(Precision=fact(), Recall=fact())
+    factory = lambda: pd.DataFrame(index=range(len(e_above_hull_true)))
+    dfs = dict(Precision=factory(), Recall=factory(), F1=factory())
 
     for model_name in df_preds:
         model_preds = df_preds[model_name].sort_values()
@@ -502,36 +502,43 @@ def cumulative_precision_recall(
 
         true_pos_cumsum = true_pos.cumsum()
         # precision aka positive predictive value (PPV)
-        precision = true_pos_cumsum / (true_pos_cumsum + false_pos.cumsum())
+        precision_cum = true_pos_cumsum / (true_pos_cumsum + false_pos.cumsum())
         n_total_pos = sum(true_pos) + sum(false_neg)
-        recall = true_pos_cumsum / n_total_pos  # aka true_pos_rate aka sensitivity
+        recall_cum = true_pos_cumsum / n_total_pos  # aka true_pos_rate aka sensitivity
 
-        end = int(np.argmax(recall))
+        end = int(np.argmax(recall_cum))
         xs = np.arange(end)
 
-        prec_interp = scipy.interpolate.interp1d(xs, precision[:end], kind="cubic")
-        recall_interp = scipy.interpolate.interp1d(xs, recall[:end], kind="cubic")
+        # cumulative F1 score
+        f1_cum = 2 * (precision_cum * recall_cum) / (precision_cum + recall_cum)
+
+        prec_interp = scipy.interpolate.interp1d(xs, precision_cum[:end], kind="cubic")
+        recall_interp = scipy.interpolate.interp1d(xs, recall_cum[:end], kind="cubic")
+        f1_interp = scipy.interpolate.interp1d(xs, f1_cum[:end], kind="cubic")
         dfs["Precision"][model_name] = pd.Series(prec_interp(xs))
         dfs["Recall"][model_name] = pd.Series(recall_interp(xs))
+        dfs["F1"][model_name] = pd.Series(f1_interp(xs))
 
     for key, df in dfs.items():
         # drop all-NaN rows so plotly plot x-axis only extends to largest number of
         # predicted materials by any model
         df.dropna(how="all", inplace=True)
+        # will be used as facet_col in plotly to split different metrics into subplots
         df["metric"] = key
 
     df_cum = pd.concat(dfs.values())
+    # subselect rows for speed, plot has sufficient precision with 1k rows
+    df_cum = df_cum.iloc[:: len(df_cum) // 1000 or 1]
 
     if backend == "matplotlib":
-        fig, axs = plt.subplots(1, 2, figsize=(15, 7), sharey=True)
+        fig, axs = plt.subplots(1, len(dfs), figsize=(15, 7), sharey=True)
         line_kwargs = dict(
             linewidth=3, markevery=[-1], marker="x", markersize=14, markeredgewidth=2.5
         )
         for (key, df), ax in zip(dfs.items(), axs):
             # select every n-th row of df so that 1000 rows are left for increased
             # plotting speed and reduced file size
             # falls back on every row if df has less than 1000 rows
-
             df.iloc[:: len(df) // 1000 or 1].plot(
                 ax=ax, legend=False, backend=backend, **line_kwargs | kwargs, ylabel=key
             )
@@ -541,9 +548,12 @@ def cumulative_precision_recall(
         bbox = dict(facecolor="white", alpha=0.5, edgecolor="none")
         assert len(axs) == len(dfs), f"{len(axs)} != {len(dfs)}"
 
-        for ax, df in zip(axs, dfs.values()):
+        for ax, (key, df) in zip(axs.flat, dfs.items()):
             ax.set(ylim=(0, 1), xlim=(0, None), ylabel=key)
             for model in df_preds:
+                # TODO is this if really necessary?
+                if len(df[model].dropna()) == 0:
+                    continue
                 x_end = df[model].dropna().index[-1]
                 y_end = df[model].dropna().iloc[-1]
                 # place model name at the end of every line
@@ -556,11 +566,12 @@ def cumulative_precision_recall(
         # optimal recall line finds all stable materials without any false positives
         # can be included to confirm all models start out of with near optimal recall
         # and to see how much each model overshoots total n_stable
-        n_below_hull = sum(e_above_hull_true < 0)
         if show_optimal:
+            ax = next(filter(lambda ax: ax.get_ylabel() == "Recall", axs.flat))
+            n_below_hull = sum(e_above_hull_true < 0)
             opt_label = "Optimal Recall"
-            axs[1].plot([0, n_below_hull], [0, 1], color="green", linestyle="--")
-            axs[1].text(
+            ax.plot([0, n_below_hull], [0, 1], color="green", linestyle="--")
+            ax.text(
                 *[n_below_hull, 0.81],
                 opt_label,
                 color="green",
@@ -571,16 +582,29 @@ def cumulative_precision_recall(
             )
 
     elif backend == "plotly":
-        fig = df_cum.iloc[:: len(df_cum) // 1000 or 1].plot(
-            backend=backend, facet_col="metric", **kwargs
+        fig = df_cum.plot(
+            backend=backend,
+            facet_col="metric",
+            facet_col_wrap=3,
+            facet_col_spacing=0.03,
+            # pivot df in case we want to show all 3 metrics in each plot's hover
+            # requires fixing index mismatch due to df subsampling above
+            # customdata=dict(
+            #     df_cum.reset_index()
+            #     .pivot(index="index", columns="metric")["Voronoi RF above hull pred"]
+            #     .items()
+            # ),
+            **kwargs,
         )
         fig.update_traces(line=dict(width=4))
-        for idx in range(1, 3):
-            fig.update_xaxes(
-                title_text="Number of materials predicted stable", row=1, col=idx
+        for idx, metric in enumerate(df_cum.metric.unique(), 1):
+            x_axis_label = "Number of materials predicted stable" if idx == 2 else ""
+            fig.update_xaxes(title=x_axis_label, col=idx)
+            fig.update_yaxes(title=dict(text=metric, standoff=0), col=idx)
+            fig.update_traces(
+                hovertemplate=f"Index = %{{x:d}}<br>{metric} = %{{y:.2f}}",
+                col=idx,  # model = %{customdata[0]}<br>
             )
-        fig.update_yaxes(title="Precision", col=1)
-        fig.update_yaxes(title="Recall", col=2)
         fig.for_each_annotation(lambda a: a.update(text=""))
         fig.update_layout(legend=dict(title=""))
         fig.update_layout(showlegend=False)
diff --git a/scripts/precision_recall.py b/scripts/precision_recall.py
@@ -1,5 +1,5 @@
 # %%
-from sklearn.metrics import f1_score
+import pandas as pd
 
 from matbench_discovery import ROOT, today
 from matbench_discovery.data import load_df_wbm_with_preds
@@ -11,8 +11,8 @@
 
 # %%
 models = (
-    "Wren, CGCNN IS2RE, CGCNN RS2RE, Voronoi RF, "
-    "Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
+    # Wren, CGCNN IS2RE, CGCNN RS2RE
+    "Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
 ).split(", ")
 
 df_wbm = load_df_wbm_with_preds(models=models).round(3)
@@ -23,21 +23,19 @@
 
 
 # %%
+df_e_above_hull_pred = pd.DataFrame()
 for model in models:
-    pred_col = f"{model}_e_form"
-    F1 = f1_score(df_wbm[e_above_hull_col] < 0, df_wbm[model] < 0)
-    plot_label = f"{model} {F1=:.2}"
-    df_wbm[plot_label] = df_wbm[e_above_hull_col] + df_wbm[model] - df_wbm[target_col]
+    e_above_hul_pred = df_wbm[e_above_hull_col] + df_wbm[model] - df_wbm[target_col]
+    df_e_above_hull_pred[model] = e_above_hul_pred
 
 fig, df_metric = cumulative_precision_recall(
     e_above_hull_true=df_wbm[e_above_hull_col],
-    df_preds=df_wbm.filter(like="F1="),
+    df_preds=df_e_above_hull_pred,
     project_end_point="xy",
     backend=(backend := "plotly"),
     show_optimal=True,
 )
 
-
 title = f"{today} - Cumulative Precision and Recall for Stable Materials"
 # xlabel_cumulative = "Materials predicted stable sorted by hull distance"
 if backend == "matplotlib":
@@ -46,7 +44,6 @@
 elif backend == "plotly":
     fig.update_layout(title=title)
 
-
 fig.show()
 
 
diff --git a/site/.eslintrc.yml b/site/.eslintrc.yml
@@ -18,7 +18,7 @@ settings:
   svelte3/typescript: true
 rules:
   indent: [error, 2, SwitchCase: 1]
-  # '@typescript-eslint/quotes': [error, backtick, avoidEscape: true]
+  '@typescript-eslint/quotes': [error, backtick, avoidEscape: true]
   semi: [error, never]
   linebreak-style: [error, unix]
   no-console: [error, allow: [warn, error]]
diff --git a/site/src/routes/+layout.svelte b/site/src/routes/+layout.svelte
@@ -7,7 +7,7 @@
   import '../app.css'
 
   const routes = Object.keys(import.meta.glob(`./*/+page.{svx,svelte,md}`)).map(
-    (filename) => '/' + filename.split(`/`)[1]
+    (filename) => `/` + filename.split(`/`)[1]
   )
 </script>
 
diff --git a/site/src/routes/+page.svelte b/site/src/routes/+page.svelte
@@ -1,10 +1,10 @@
 <script lang="ts">
-  import Plot from '$root/figures/2022-12-05-precision-recall-curves.svelte'
+  import Plot from '$root/figures/2022-12-25-precision-recall-curves.svelte'
   import Readme from '$root/readme.md'
 </script>
 
 <Readme />
 
-{#if typeof document !== 'undefined'}
-  <Plot />
+{#if typeof document !== `undefined`}
+  <Plot style="margin: 0 -5vw;" />
 {/if}
diff --git a/site/src/routes/about-the-data/+page.svelte b/site/src/routes/about-the-data/+page.svelte
@@ -0,0 +1,29 @@
+<script lang="ts">
+  import DataReadme from '$root/data/wbm/readme.md'
+  import { onMount } from 'svelte'
+
+  const figs = import.meta.glob(`$root/data/wbm/*.{png,svg,pdf}`, {
+    eager: true,
+    as: `url`,
+  })
+
+  onMount(() => {
+    for (const img of document.querySelectorAll(`img`)) {
+      const src = img.getAttribute(`src`)
+      if (figs[`../data/wbm/${src}`]) {
+        img.src = figs[`../data/wbm/${src}`]
+      }
+    }
+  })
+</script>
+
+<main>
+  <DataReadme />
+</main>
+
+<style>
+  :global(img) {
+    max-width: 100%;
+    margin: 1em auto;
+  }
+</style>
diff --git a/site/svelte.config.js b/site/svelte.config.js
@@ -33,5 +33,9 @@ export default {
 
   kit: {
     adapter: adapter(),
+
+    prerender: {
+      handleHttpError: `warn`,
+    },
   },
 }
diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -44,11 +44,12 @@ def test_cumulative_precision_recall(
 
     if backend == "matplotlib":
         assert isinstance(fig, plt.Figure)
-        ax1, ax2 = fig.axes
-        assert ax1.get_ylim() == ax2.get_ylim() == (0, 1)
-        assert ax1.get_ylabel() == "Recall"
-        # TODO ax2 ylabel also 'Recall', should be 'Precision'
-        # assert ax2.get_ylabel() == "Precision"
+        assert all(ax.get_ylim() == (0, 1) for ax in fig.axes)
+        assert (
+            [ax.get_ylabel() for ax in fig.axes]
+            == list(df_metrics.metric.unique())
+            == ["Precision", "Recall", "F1"]
+        )
     elif backend == "plotly":
         assert isinstance(fig, go.Figure)
         assert fig.layout.yaxis1.title.text == "Precision"

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`import '../app.css'`
`8`	`8`
`9`	`9`	const routes = Object.keys(import.meta.glob(`./*/+page.{svx,svelte,md}`)).map(
`10`		- (filename) => '/' + filename.split(`/`)[1]
	`10`	+ (filename) => `/` + filename.split(`/`)[1]
`11`	`11`	`)`
`12`	`12`	`</script>`
`13`	`13`
Original file line number	Diff line number	Diff line change
`@@ -33,5 +33,9 @@ export default {`
`33`	`33`
`34`	`34`	`kit: {`
`35`	`35`	`adapter: adapter(),`
	`36`	`+`
	`37`	`+ prerender: {`
	`38`	+ handleHttpError: `warn`,
	`39`	`+ },`
`36`	`40`	`},`
`37`	`41`	`}`