add precision_recall_vs_calc_count() to plot_funcs.py extracted from plot_scripts/precision_recall_vs_calc_count.py

janosh · janosh · commit 7ed1b09da288 · 2023-06-19T20:29:21.000-07:00
diff --git a/mb_discovery/plot_scripts/plot_funcs.py b/mb_discovery/plot_scripts/plot_funcs.py
@@ -5,8 +5,9 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import scipy.interpolate
+import scipy.stats
 from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
-from scipy.stats import sem as std_err_of_mean
 
 
 __author__ = "Janosh Riebesell"
@@ -162,7 +163,7 @@ def rolling_mae_vs_hull_dist(
     if ax is None:
         ax = plt.gca()
 
-    ax_is_fresh = len(ax.lines) == 0
+    is_fresh_ax = len(ax.lines) == 0
 
     bins = np.arange(*x_lim, increment)
 
@@ -175,15 +176,15 @@ def rolling_mae_vs_hull_dist(
 
         mask = (df[e_above_hull_col] <= high) & (df[e_above_hull_col] > low)
         rolling_maes[idx] = df[residual_col].loc[mask].abs().mean()
-        rolling_stds[idx] = std_err_of_mean(df[residual_col].loc[mask].abs())
+        rolling_stds[idx] = scipy.stats.sem(df[residual_col].loc[mask].abs())
 
     ax.plot(bins, rolling_maes, **kwargs)
 
     ax.fill_between(
         bins, rolling_maes + rolling_stds, rolling_maes - rolling_stds, alpha=0.3
     )
 
-    if not ax_is_fresh:
+    if not is_fresh_ax:
         # return earlier if all plot objects besides the line were already drawn by a
         # previous call
         return ax
@@ -249,3 +250,99 @@ def rolling_mae_vs_hull_dist(
     ax.set(xlim=x_lim, ylim=(0.0, 0.14))
 
     return ax
+
+
+def precision_recall_vs_calc_count(
+    df: pd.DataFrame,
+    residual_col: str = "residual",
+    e_above_hull_col: str = "e_above_hull",
+    criterion: Literal["energy", "std", "neg_std"] = "energy",
+    stability_thresh: float = 0,  # set stability threshold as distance to convex hull
+    # in eV / atom, usually 0 or 0.1 eV
+    ax: plt.Axes = None,
+    label: str = None,
+    **kwargs: Any,
+) -> plt.Axes:
+    """Precision and recall as a function of the number of calculations performed."""
+    if ax is None:
+        ax = plt.gca()
+
+    is_fresh_ax = len(ax.lines) == 0
+
+    df = df.sort_values(by="residual")
+
+    if criterion == "energy":
+        test = df[residual_col]
+    elif "std" in criterion:
+        # TODO column names to compute standard deviation from are currently hardcoded
+        # needs to be updated when adding non-aviary models with uncertainty estimation
+        var_aleatoric = (df.filter(like="_ale_") ** 2).mean(axis=1)
+        var_epistemic = df.filter(regex=r"_pred_\d").var(axis=1, ddof=0)
+        std_total = (var_epistemic + var_aleatoric) ** 0.5
+
+        if criterion == "std":
+            test += std_total
+        elif criterion == "neg_std":
+            test -= std_total
+
+    # stability_thresh = 0.02
+    stability_thresh = 0
+    # stability_thresh = 0.10
+
+    true_pos_mask = (df[e_above_hull_col] <= stability_thresh) & (
+        df.residual <= stability_thresh
+    )
+    false_neg_mask = (df[e_above_hull_col] <= stability_thresh) & (
+        df.residual > stability_thresh
+    )
+    false_pos_mask = (df[e_above_hull_col] > stability_thresh) & (
+        df.residual <= stability_thresh
+    )
+
+    true_pos_cumsum = true_pos_mask.cumsum()
+
+    ppv = true_pos_cumsum / (true_pos_cumsum + false_pos_mask.cumsum()) * 100
+    n_true_pos = sum(true_pos_mask)
+    n_false_neg = sum(false_neg_mask)
+    n_total_pos = n_true_pos + n_false_neg
+    tpr = true_pos_cumsum / n_total_pos * 100
+
+    end = int(np.argmax(tpr))
+
+    xs = np.arange(end)
+
+    precision_curve = scipy.interpolate.interp1d(xs, ppv[:end], kind="cubic")
+    rolling_recall_curve = scipy.interpolate.interp1d(xs, tpr[:end], kind="cubic")
+
+    line_kwargs = dict(
+        linewidth=3,
+        markevery=[-1],
+        marker="x",
+        markersize=14,
+        markeredgewidth=2.5,
+        **kwargs,
+    )
+    ax.plot(xs, precision_curve(xs), linestyle="-", **line_kwargs)
+    ax.plot(xs, rolling_recall_curve(xs), linestyle=":", **line_kwargs)
+    ax.plot((0, 0), (0, 0), label=label, **line_kwargs)
+
+    if not is_fresh_ax:
+        # return earlier if all plot objects besides the line were already drawn by a
+        # previous call
+        return ax
+
+    ax.set(xlabel="Number of Calculations", ylabel="Percentage")
+
+    ax.set(xlim=(0, 8e4), ylim=(0, 100))
+
+    [precision] = ax.plot((0, 0), (0, 0), "black", linestyle="-")
+    [recall] = ax.plot((0, 0), (0, 0), "black", linestyle=":")
+    legend = ax.legend(
+        [precision, recall],
+        ("Precision", "Recall"),
+        frameon=False,
+        loc="upper right",
+    )
+    ax.add_artist(legend)
+
+    return ax
diff --git a/mb_discovery/plot_scripts/precision_recall_vs_calc_count.py b/mb_discovery/plot_scripts/precision_recall_vs_calc_count.py
@@ -2,11 +2,10 @@
 from datetime import datetime
 
 import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
-from scipy.interpolate import interp1d
 
 from mb_discovery import ROOT
+from mb_discovery.plot_scripts.plot_funcs import precision_recall_vs_calc_count
 
 
 __author__ = "Rhys Goodall, Janosh Riebesell"
@@ -31,27 +30,22 @@
         f"{ROOT}/data/2022-06-11-from-rhys/{model_name}-mp-initial-structures.csv"
     ).set_index("material_id")
 
-dfs["M3GNet"] = pd.read_json(
-    f"{ROOT}/data/2022-08-16-m3gnet-wbm-relax-results-IS2RE.json.gz"
-).set_index("material_id")
+# dfs["M3GNet"] = pd.read_json(
+#     f"{ROOT}/data/2022-08-16-m3gnet-wbm-relax-results-IS2RE.json.gz"
+# ).set_index("material_id")
 
-dfs["Wrenformer"] = pd.read_csv(
-    f"{ROOT}/data/2022-08-16-wrenformer-ensemble-predictions.csv.bz2"
-).set_index("material_id")
+# dfs["Wrenformer"] = pd.read_csv(
+#     f"{ROOT}/data/2022-08-16-wrenformer-ensemble-predictions.csv.bz2"
+# ).set_index("material_id")
 
 # dfs["Wrenformer"]["e_form_target"] = dfs["Wren"]["e_form_target"]
 # dfs["M3GNet"]["e_form_target"] = dfs["Wren"]["e_form_target"]
 
 
 # %%
-fig, ax = plt.subplots(1, 1, figsize=(10, 9))
-
-for model_name, color in zip(
-    ("Wren", "CGCNN", "Voronoi", "M3GNet", "Wrenformer"),
-    ("tab:blue", "tab:orange", "teal", "tab:pink", "black"),
-    strict=True,
+for (model_name, df), color in zip(
+    dfs.items(), ("tab:blue", "tab:orange", "teal", "tab:pink", "black")
 ):
-    df = dfs[model_name]
     df["e_above_mp_hull"] = df_hull.e_above_mp_hull
 
     assert df.e_above_mp_hull.isna().sum() == 0
@@ -88,81 +82,19 @@
         raise KeyError(f"{model_name = }") from exc
 
     df["residual"] = model_preds - targets + df.e_above_mp_hull
-    df = df.sort_values(by="residual")
-
-    # epistemic_var = df.filter(regex=r"_pred_\d").var(axis=1, ddof=0)
-
-    # aleatoric_var = (df.filter(like="_ale_") ** 2).mean(axis=1)
-
-    # std_total = (epistemic_var + aleatoric_var) ** 0.5
-
-    # criterion = "std"
-    # test = df.residual + std_total
-
-    # criterion = "neg"
-    # test = df.residual - std_total
-
-    criterion = "energy"
-
-    # stability_thresh = 0.02
-    stability_thresh = 0
-    # stability_thresh = 0.10
-
-    true_pos_mask = (df.e_above_mp_hull <= stability_thresh) & (
-        df.residual <= stability_thresh
-    )
-    false_neg_mask = (df.e_above_mp_hull <= stability_thresh) & (
-        df.residual > stability_thresh
-    )
-    false_pos_mask = (df.e_above_mp_hull > stability_thresh) & (
-        df.residual <= stability_thresh
-    )
-
-    energy_type = "pred"
-    true_pos_cumsum = true_pos_mask.cumsum()
-    xlabel = r"$\Delta E_{Hull-Pred}$ / eV per atom"
-
-    ppv = true_pos_cumsum / (true_pos_cumsum + false_pos_mask.cumsum()) * 100
-    n_true_pos = sum(true_pos_mask)
-    n_false_neg = sum(false_neg_mask)
-    n_total_pos = n_true_pos + n_false_neg
-    tpr = true_pos_cumsum / n_total_pos * 100
-
-    end = int(np.argmax(tpr))
 
-    xs = np.arange(end)
-
-    precision_curve = interp1d(xs, ppv[:end], kind="cubic")
-    rolling_recall_curve = interp1d(xs, tpr[:end], kind="cubic")
-
-    line_kwargs = dict(
-        linewidth=3,
+    ax = precision_recall_vs_calc_count(
+        df,
+        residual_col="residual",
+        e_above_hull_col="e_above_mp_hull",
         color=color,
-        markevery=[-1],
-        marker="x",
-        markersize=14,
-        markeredgewidth=2.5,
+        label=model_name,
     )
-    ax.plot(xs, precision_curve(xs), linestyle="-", **line_kwargs)
-    ax.plot(xs, rolling_recall_curve(xs), linestyle=":", **line_kwargs)
-    ax.plot((0, 0), (0, 0), label=model_name, **line_kwargs)
-
-
-ax.set(xlabel="Number of Calculations", ylabel="Percentage")
-
-ax.set(xlim=(0, 8e4), ylim=(0, 100))
 
 model_legend = ax.legend(frameon=False, loc="lower right")
 ax.add_artist(model_legend)
 
-[precision] = ax.plot((0, 0), (0, 0), "black", linestyle="-")
-[recall] = ax.plot((0, 0), (0, 0), "black", linestyle=":")
-ax.legend(
-    [precision, recall], ("Precision", "Recall"), frameon=False, loc="upper right"
-)
+ax.figure.set_size_inches(10, 9)
 
-img_path = (
-    f"{ROOT}/figures/{today}-precision-recall-vs-calc-count-"
-    f"{energy_type=}-{criterion=}-{rare=}.pdf"
-)
+img_path = f"{ROOT}/figures/{today}-precision-recall-vs-calc-count-{rare=}.pdf"
 # plt.savefig(img_path)