rename plot func precision_recall_vs_calc_count() to cumulative_clf_metric() that plots single metric at a time

janosh · janosh · commit 86f85f36488e · 2023-06-19T20:29:21.000-07:00
diff --git a/matbench_discovery/plot_scripts/precision_recall.py b/matbench_discovery/plot_scripts/precision_recall.py
@@ -1,12 +1,13 @@
 # %%
 from datetime import datetime
 
+import matplotlib.pyplot as plt
 import pandas as pd
 from sklearn.metrics import f1_score
 
 from matbench_discovery import ROOT
 from matbench_discovery.plot_scripts import df_wbm
-from matbench_discovery.plots import StabilityCriterion, precision_recall_vs_calc_count
+from matbench_discovery.plots import StabilityCriterion, cumulative_clf_metric
 
 __author__ = "Rhys Goodall, Janosh Riebesell"
 
@@ -46,13 +47,6 @@
 F1s: dict[str, float] = {}
 
 for model_name, df in dfs.items():
-    # from pymatgen.core import Composition
-    # rare = "no-lanthanides"
-    # df["contains_rare_earths"] = df.composition.map(
-    #     lambda x: any(el.is_rare_earth_metal for el in Composition(x))
-    # )
-    # df = df.query("~contains_rare_earths")
-
     if "std" in stability_crit:
         # TODO column names to compute standard deviation from are currently hardcoded
         # needs to be updated when adding non-aviary models with uncertainty estimation
@@ -91,42 +85,47 @@
 
 
 # %%
+fig, (ax_prec, ax_recall) = plt.subplots(1, 2, figsize=(15, 7), sharey=True)
+
 for (model_name, F1), color in zip(sorted(F1s.items(), key=lambda x: x[1]), colors):
     df = dfs[model_name]
+    e_above_hull_error = df.e_above_hull_pred + df.e_above_hull_mp
+    e_above_hull_true = df.e_above_hull_mp
+    cumulative_clf_metric(
+        e_above_hull_error,
+        e_above_hull_true,
+        color=color,
+        label=f"{model_name}\n{F1=:.2}",
+        project_end_point="xy",
+        stability_crit=stability_crit,
+        ax=ax_prec,
+        metric="precision",
+    )
 
-    ax = precision_recall_vs_calc_count(
-        e_above_hull_error=df.e_above_hull_pred + df.e_above_hull_mp,
-        e_above_hull_true=df.e_above_hull_mp,
+    cumulative_clf_metric(
+        e_above_hull_error,
+        e_above_hull_true,
         color=color,
-        label=f"{model_name} {F1=:.2}",
-        intersect_lines="recall_xy",  # or "precision_xy", None, 'all'
+        label=f"{model_name}\n{F1=:.2}",
+        project_end_point="xy",
         stability_crit=stability_crit,
-        std_pred=std_total,
+        ax=ax_recall,
+        metric="recall",
     )
 
-# optimal recall line finds all stable materials without any false positives
-# can be included to confirm all models start out of with near optimal recall
-# and to see how much each model overshoots total n_stable
-n_below_hull = sum(df_wbm.e_above_hull_mp2020_corrected_ppd_mp < 0)
-ax.plot(
-    [0, n_below_hull],
-    [0, 100],
-    color="green",
-    linestyle="dashed",
-    linewidth=1,
-    label="Optimal Recall",
-)
-
-ax.figure.set_size_inches(10, 9)
-ax.set(xlim=(0, None))
-# keep this outside loop so all model names appear in legend
-ax.legend(frameon=False, loc="lower right")
+
+for ax in (ax_prec, ax_recall):
+    ax.set(xlim=(0, None))
+
 
 img_name = f"{today}-precision-recall-vs-calc-count-{rare=}"
-ax.set(title=img_name.replace("-", "/", 2).replace("-", " ").title())
 # x-ticks every 10k materials
-ax.set(xticks=range(0, int(ax.get_xlim()[1]), 10_000))
+# ax.set(xticks=range(0, int(ax.get_xlim()[1]), 10_000))
+
+fig.suptitle(f"{today} ")
+xlabel_cumulative = "Materials predicted stable sorted by hull distance"
+fig.text(0.5, -0.08, xlabel_cumulative, ha="center")
 
 
 # %%
-ax.figure.savefig(f"{ROOT}/figures/{img_name}.pdf")
+fig.savefig(f"{ROOT}/figures/{img_name}.pdf")
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from collections.abc import Sequence
 from typing import Any, Literal, get_args
 
 import matplotlib.pyplot as plt
@@ -17,6 +16,7 @@
 
 StabilityCriterion = Literal["energy", "energy+std", "energy-std"]
 WhichEnergy = Literal["true", "pred"]
+AxLine = Literal["x", "y", "xy", ""]
 
 
 # --- define global plot settings
@@ -53,6 +53,7 @@
 
 
 plt.rc("font", size=14)
+plt.rc("legend", fontsize=16)
 plt.rc("savefig", bbox="tight", dpi=200)
 plt.rc("figure", dpi=200, titlesize=16)
 plt.rcParams["figure.constrained_layout.use"] = True
@@ -282,16 +283,18 @@ def rolling_mae_vs_hull_dist(
     return ax
 
 
-def precision_recall_vs_calc_count(
+def cumulative_clf_metric(
     e_above_hull_error: pd.Series,
     e_above_hull_true: pd.Series,
+    metric: Literal["precision", "recall"],
     std_pred: pd.Series = None,
     stability_crit: StabilityCriterion = "energy",
     stability_threshold: float = 0,  # set stability threshold as distance to convex
     # hull in eV / atom, usually 0 or 0.1 eV
     ax: plt.Axes = None,
     label: str = None,
-    intersect_lines: str | Sequence[str] = (),
+    project_end_point: AxLine = "xy",
+    show_optimal: bool = False,
     **kwargs: Any,
 ) -> plt.Axes:
     """Precision and recall as a function of the number of included materials sorted
@@ -305,26 +308,27 @@ def precision_recall_vs_calc_count(
             predictions, i.e. residual = pred - target. Defaults to "residual".
         e_above_hull_true (str, optional): Column name with convex hull distance values.
             Defaults to "e_above_hull".
+        metric ('precision' | 'recall', optional): Metric to plot.
         stability_crit ('energy' | 'energy+std' | 'energy-std', optional): Whether to
             use energy+/-std as stability stability_crit where std is the model
             predicted uncertainty for the energy it stipulated. Defaults to "energy".
         stability_threshold (float, optional): Max distance from convex hull before
             material is considered unstable. Defaults to 0.
         label (str, optional): Model name used to identify its liens in the legend.
             Defaults to None.
-        intersect_lines (Sequence[str], optional): precision_{x,y,xy} and/or
-            recall_{x,y,xy}. Defaults to (), i.e. no intersect lines.
+        project_end_point ('x' | 'y' | 'xy' | '', optional): Defaults to '', i.e. no
+            axis projection lines.
+        show_optimal (bool, optional): Whether to plot the optimal precision/recall
+            line. Defaults to False.
 
     Returns:
         plt.Axes: The matplotlib axes object.
     """
     ax = ax or plt.gca()
 
-    # for series in (e_above_hull_error, e_above_hull_true):
-    #     n_nans = series.isna().sum()
-    #     assert n_nans == 0, f"{n_nans:,} NaNs in {series.name}"
-
-    is_fresh_ax = len(ax.lines) == 0
+    for series in (e_above_hull_error, e_above_hull_true):
+        n_nans = series.isna().sum()
+        assert n_nans == 0, f"{n_nans:,} NaNs in {series.name}"
 
     e_above_hull_error = e_above_hull_error.sort_values()
     e_above_hull_true = e_above_hull_true.loc[e_above_hull_error.index]
@@ -338,10 +342,6 @@ def precision_recall_vs_calc_count(
     elif stability_crit == "energy-std":
         e_above_hull_error -= std_pred
 
-    # stability_threshold = 0.02
-    stability_threshold = 0
-    # stability_threshold = 0.10
-
     true_pos_mask = (e_above_hull_true <= stability_threshold) & (
         e_above_hull_error <= stability_threshold
     )
@@ -362,68 +362,56 @@ def precision_recall_vs_calc_count(
     true_pos_rate = true_pos_cumsum / n_total_pos * 100
 
     end = int(np.argmax(true_pos_rate))
-
     xs = np.arange(end)
 
-    precision_curve = scipy.interpolate.interp1d(xs, precision[:end], kind="cubic")
-    rolling_recall_curve = scipy.interpolate.interp1d(
-        xs, true_pos_rate[:end], kind="cubic"
-    )
+    ys_raw = dict(precision=precision, recall=true_pos_rate)[metric]
+    y_interp = scipy.interpolate.interp1d(xs, ys_raw[:end], kind="cubic")
+    ys = y_interp(xs)
 
     line_kwargs = dict(
-        linewidth=4,
-        markevery=[-1],
-        marker="x",
-        markersize=14,
-        markeredgewidth=2.5,
-        **kwargs,
-    )
-    ax.plot(xs, precision_curve(xs), linestyle="-", **line_kwargs)
-    ax.plot(xs, rolling_recall_curve(xs), linestyle=":", **line_kwargs)
-    ax.plot((0, 0), (0, 0), label=label, **line_kwargs)
-
-    if intersect_lines == "all":
-        intersect_lines = ("precision_xy", "recall_xy")
-    if isinstance(intersect_lines, str):
-        intersect_lines = [intersect_lines]
-    for line_name in intersect_lines:
-        try:
-            line_name_map = dict(precision=precision_curve, recall=rolling_recall_curve)
-            y_func = line_name_map[line_name.split("_")[0]]
-        except KeyError:
-            raise ValueError(
-                f"Invalid {intersect_lines=}, must be one of {list(line_name_map)}"
-            )
-        intersect_kwargs = dict(
-            linestyle=":", alpha=0.4, color=kwargs.get("color", "gray")
-        )
-        # Add some visual guidelines
-        if "x" in line_name:
-            ax.plot((0, xs[-1]), (y_func(xs[-1]), y_func(xs[-1])), **intersect_kwargs)
-        if "y" in line_name:
-            ax.plot((xs[-1], xs[-1]), (0, y_func(xs[-1])), **intersect_kwargs)
-
-    if not is_fresh_ax:
-        # return earlier if all plot objects besides the line were already drawn by a
-        # previous call
-        return ax
-
-    xlabel = "Number of compounds sorted by model-predicted hull distance"
-    ylabel = "Precision and Recall (%)"
-    ax.set(ylim=(0, 100), xlabel=xlabel, ylabel=ylabel)
-
-    [precision] = ax.plot(
-        (0, 0), (0, 0), "black", linestyle="-", linewidth=line_kwargs["linewidth"]
-    )
-    [recall] = ax.plot(
-        (0, 0), (0, 0), "black", linestyle=":", linewidth=line_kwargs["linewidth"]
+        linewidth=2, markevery=[-1], marker="x", markersize=14, markeredgewidth=2.5
     )
-    legend = ax.legend(
-        [precision, recall],
-        ("Precision", "Recall"),
-        frameon=False,
-        loc="upper right",
+    ax.plot(xs, ys, **line_kwargs | kwargs)
+    ax.text(
+        xs[-1],
+        ys[-1],
+        label,
+        color=kwargs.get("color"),
+        verticalalignment="bottom",
+        rotation=30,
+        bbox=dict(facecolor="white", alpha=0.5, edgecolor="none"),
     )
-    ax.add_artist(legend)
+
+    # add some visual guidelines
+    intersect_kwargs = dict(linestyle=":", alpha=0.4, color=kwargs.get("color"))
+    if "x" in project_end_point:
+        ax.plot((0, xs[-1]), (ys[-1], ys[-1]), **intersect_kwargs)
+    if "y" in project_end_point:
+        ax.plot((xs[-1], xs[-1]), (0, ys[-1]), **intersect_kwargs)
+
+    ax.set(ylim=(0, 100), ylabel=f"{metric.title()} (%)")
+
+    # optimal recall line finds all stable materials without any false positives
+    # can be included to confirm all models start out of with near optimal recall
+    # and to see how much each model overshoots total n_stable
+    n_below_hull = sum(e_above_hull_true < 0)
+    if show_optimal:
+        ax.plot(
+            [0, n_below_hull],
+            [0, 100],
+            color="green",
+            linestyle="dashed",
+            linewidth=1,
+            label=f"Optimal {metric.title()}",
+        )
+        ax.text(
+            n_below_hull,
+            100,
+            label,
+            color=kwargs.get("color"),
+            verticalalignment="top",
+            rotation=-30,
+            bbox=dict(facecolor="white", alpha=0.5, edgecolor="none"),
+        )
 
     return ax
diff --git a/tests/test_plots.py b/tests/test_plots.py