janosh
diff --git a/‎citation.cff
+1 b/‎citation.cff
+1
diff --git a/‎matbench_discovery/plots.py
+50-35 b/‎matbench_discovery/plots.py
+50-35
diff --git a/‎matbench_discovery/preds.py
+15 b/‎matbench_discovery/preds.py
+15
diff --git a/‎scripts/model_figs/cumulative_metrics.py
+20-12 b/‎scripts/model_figs/cumulative_metrics.py
+20-12
diff --git a/‎scripts/model_figs/hist_classified_stable_vs_hull_dist_models.py
+1-1 b/‎scripts/model_figs/hist_classified_stable_vs_hull_dist_models.py
+1-1
diff --git a/‎scripts/model_figs/make_hull_dist_box_plot.py
+10 b/‎scripts/model_figs/make_hull_dist_box_plot.py
+10
diff --git a/‎scripts/model_figs/roc_prc_curves_models.py
+1-3 b/‎scripts/model_figs/roc_prc_curves_models.py
+1-3
diff --git a/‎scripts/model_figs/rolling_mae_vs_hull_dist_models.py
+6-6 b/‎scripts/model_figs/rolling_mae_vs_hull_dist_models.py
+6-6
@@ -43,6 +43,7 @@ authors:
 affiliations:
   - Cavendish Laboratory, University of Cambridge, UK
   - Lawrence Berkeley National Laboratory, Berkeley, USA
+  - German Federal Institute of Materials Research and Testing (BAM)
 license: MIT
 license-url: https://github.com/janosh/matbench-discovery/blob/-/license"
 repository-code: https://github.com/janosh/matbench-discovery
 
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import functools
 import math
 import os
 import subprocess
@@ -21,6 +22,8 @@
 import wandb
 from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
 from pandas.io.formats.style import Styler
+from plotly.validators.scatter.line import DashValidator
+from plotly.validators.scatter.marker import SymbolValidator
 from tqdm import tqdm
 
 from matbench_discovery import STABILITY_THRESHOLD
@@ -31,15 +34,20 @@
 
 Backend = Literal["matplotlib", "plotly"]
 
+plotly_markers = SymbolValidator().values[2::3]  # noqa: PD011
+plotly_line_styles = DashValidator().values[:-1]  # noqa: PD011
+# repeat line styles as many as times as needed to match number of markers
+plotly_line_styles *= len(plotly_markers) // len(plotly_line_styles)
 
-def unit(text: str) -> str:
+
+def plotly_unit(text: str) -> str:
     """Wrap text in a span with decreased font size and weight to display units in
     plotly labels.
     """
     return f"<span style='font-size: 0.8em; font-weight: lighter;'>({text})</span>"
 
 
-ev_per_atom = unit("eV/atom")
+ev_per_atom = plotly_unit("eV/atom")
 
 # --- start global plot settings
 quantity_labels = dict(
@@ -51,11 +59,11 @@ def unit(text: str) -> str:
     n_sites="Lattice site count",
     energy_per_atom=f"Energy {ev_per_atom}",
     e_form=f"DFT E<sub>form</sub> {ev_per_atom}",
-    e_above_hull=f"E<sub>above hull</sub> {ev_per_atom}",
-    e_above_hull_mp2020_corrected_ppd_mp=f"DFT E<sub>above hull</sub> {ev_per_atom}",
-    e_above_hull_pred=f"Predicted E<sub>above hull</sub> {ev_per_atom}",
+    e_above_hull=f"E<sub>hull dist</sub> {ev_per_atom}",
+    e_above_hull_mp2020_corrected_ppd_mp=f"DFT E<sub>hull dist</sub> {ev_per_atom}",
+    e_above_hull_pred=f"Predicted E<sub>hull dist</sub> {ev_per_atom}",
     e_above_hull_mp=f"E<sub>above MP hull</sub> {ev_per_atom}",
-    e_above_hull_error=f"Error in E<sub>above hull</sub> {ev_per_atom}",
+    e_above_hull_error=f"Error in E<sub>hull dist</sub> {ev_per_atom}",
     vol_diff="Volume difference (A^3)",
     e_form_per_atom_mp2020_corrected=f"DFT E<sub>form</sub> {ev_per_atom}",
     e_form_per_atom_pred=f"Predicted E<sub>form</sub> {ev_per_atom}",
@@ -547,7 +555,7 @@ def rolling_mae_vs_hull_dist(
         scatter_kwds = dict(
             fill="toself", opacity=0.2, hoverinfo="skip", showlegend=False
         )
-        triangle_anno = "MAE > |E<sub>above hull</sub>|"
+        triangle_anno = "MAE > |E<sub>hull dist</sub>|"
         fig.add_scatter(
             x=(-1, -dft_acc, dft_acc, 1) if show_dft_acc else (-1, 0, 1),
             y=(1, dft_acc, dft_acc, 1) if show_dft_acc else (1, 0, 1),
@@ -632,6 +640,7 @@ def cumulative_metrics(
     optimal_recall: str | None = "Optimal Recall",
     show_n_stable: bool = True,
     backend: Backend = "plotly",
+    n_points: int = 50,
     **kwargs: Any,
 ) -> tuple[plt.Figure | go.Figure, pd.DataFrame]:
     """Create 2 subplots side-by-side with cumulative precision and recall curves for
@@ -661,18 +670,24 @@ def cumulative_metrics(
             number of stable materials. Defaults to True.
         backend ('matplotlib' | 'plotly'], optional): Which plotting engine to use.
             Changes the return type. Defaults to 'plotly'.
+        n_points (int, optional): Number of points to use for interpolation of the
+            metric curves. Defaults to 80.
         **kwargs: Keyword arguments passed to df.plot().
 
     Returns:
         tuple[plt.Figure | go.Figure, pd.DataFrame]: The matplotlib/plotly figure and
             dataframe of cumulative metrics for each model.
     """
-    factory = lambda: pd.DataFrame(index=range(len(e_above_hull_true)))
-    dfs: dict[str, pd.DataFrame] = defaultdict(factory)
-    metrics_no_case = [*map(str.casefold, metrics)]
+    dfs: dict[str, pd.DataFrame] = defaultdict(pd.DataFrame)
+
+    # largest number of materials predicted stable by any model, determines x-axis range
+    n_max_pred_stable = (df_preds < stability_threshold).sum().max()
+    longest_xs = np.linspace(0, n_max_pred_stable - 1, n_points)
+    for metric in metrics:
+        dfs[metric].index = longest_xs
 
-    valid_metrics = {"precision", "recall", "f1", "mae", "rmse"}
-    if invalid_metrics := set(metrics_no_case) - valid_metrics:
+    valid_metrics = {"Precision", "Recall", "F1", "MAE", "RMSE"}
+    if invalid_metrics := set(metrics) - valid_metrics:
         raise ValueError(
             f"{invalid_metrics=}, should be case-insensitive subset of {valid_metrics=}"
         )
@@ -691,35 +706,36 @@ def cumulative_metrics(
         precision_cum = true_pos_cum / (true_pos_cum + false_pos_cum)
         recall_cum = true_pos_cum / n_total_pos  # aka true_pos_rate aka sensitivity
 
-        end = int(np.argmax(recall_cum))
-        xs = np.arange(end)
+        n_pred_stable = sum(each_pred <= stability_threshold)
+        model_range = np.arange(n_pred_stable)  # xs for interpolation
+        xs_model = longest_xs[longest_xs < n_pred_stable - 1]  # xs for plotting
 
-        if "precision" in metrics_no_case:
-            prec_interp = scipy.interpolate.interp1d(
-                xs, precision_cum[:end], kind="cubic"
-            )
-            dfs["Precision"][model_name] = pd.Series(prec_interp(xs))
-        if "recall" in metrics_no_case:
-            recall_interp = scipy.interpolate.interp1d(
-                xs, recall_cum[:end], kind="cubic"
-            )
-            dfs["Recall"][model_name] = pd.Series(recall_interp(xs))
-        if "f1" in metrics_no_case:
+        cubic_interpolate = functools.partial(scipy.interpolate.interp1d, kind="cubic")
+
+        if "Precision" in metrics:
+            prec_interp = cubic_interpolate(model_range, precision_cum[:n_pred_stable])
+            dfs["Precision"][model_name] = dict(zip(xs_model, prec_interp(xs_model)))
+
+        if "Recall" in metrics:
+            recall_interp = cubic_interpolate(model_range, recall_cum[:n_pred_stable])
+            dfs["Recall"][model_name] = dict(zip(xs_model, recall_interp(xs_model)))
+
+        if "F1" in metrics:
             f1_cum = 2 * (precision_cum * recall_cum) / (precision_cum + recall_cum)
-            f1_interp = scipy.interpolate.interp1d(xs, f1_cum[:end], kind="cubic")
-            dfs["F1"][model_name] = pd.Series(f1_interp(xs))
+            f1_interp = cubic_interpolate(model_range, f1_cum[:n_pred_stable])
+            dfs["F1"][model_name] = dict(zip(xs_model, f1_interp(xs_model)))
 
-        if "mae" in metrics_no_case:
+        if "MAE" in metrics:
             cum_errors = (each_true - each_pred).abs().cumsum()
             cum_counts = np.arange(1, len(each_true) + 1)
             mae_cum = cum_errors / cum_counts
-            mae_interp = scipy.interpolate.interp1d(xs, mae_cum[:end], kind="cubic")
-            dfs["MAE"][model_name] = pd.Series(mae_interp(xs))
+            mae_interp = cubic_interpolate(model_range, mae_cum[:n_pred_stable])
+            dfs["MAE"][model_name] = dict(zip(xs_model, mae_interp(xs_model)))
 
-        if "rmse" in metrics_no_case:
+        if "RMSE" in metrics:
             rmse_cum = (((each_true - each_pred) ** 2).cumsum() / cum_counts) ** 0.5
-            rmse_interp = scipy.interpolate.interp1d(xs, rmse_cum[:end], kind="cubic")
-            dfs["RMSE"][model_name] = pd.Series(rmse_interp(xs))
+            rmse_interp = cubic_interpolate(model_range, rmse_cum[:n_pred_stable])
+            dfs["RMSE"][model_name] = dict(zip(xs_model, rmse_interp(xs_model)))
 
     for key in dfs:
         # drop all-NaN rows so plotly plot x-axis only extends to largest number of
@@ -730,7 +746,6 @@ def cumulative_metrics(
 
     df_cum = pd.concat(dfs.values())
     # subselect rows for speed, plot has sufficient precision with 1k rows
-    df_cum = df_cum.iloc[:: len(df_cum) // 1000 or 1]
     n_stable = sum(e_above_hull_true <= STABILITY_THRESHOLD)
 
     if backend == "matplotlib":
@@ -751,7 +766,7 @@ def cumulative_metrics(
             # plotting speed and reduced file size
             # falls back on every row if df has less than 1000 rows
             df = dfs[metric]
-            df.iloc[:: len(df) // 1000 or 1].plot(
+            df.plot(
                 ax=ax,
                 legend=False,
                 backend=backend,
 
@@ -169,6 +169,21 @@ def load_df_wbm_with_preds(
 models = list(df_metrics.T.MAE.sort_values().index)
 
 
+# To avoid confusion for anyone reading this code, we calculate the formation energy MAE
+# here and report it as the MAE for the energy above the convex hull prediction. The
+# former is more easily calculated but the two quantities are the same. The formation
+# energy of a material is the difference in energy between a material and its
+# constituent elements in their standard states. The distance to the convex hull is
+# defined as the difference between a material's formation energy and the minimum
+# formation energy of all possible stable materials made from the same elements. Since
+# the formation energy of a material is used to calculate the distance to the convex
+# hull, the error of a formation energy prediction directly determines the error in the
+# distance to the convex hull prediction.
+
+# A further point of clarification: whenever we say convex hull distance we mean
+# the signed distance that is positive for thermodynamically unstable materials above
+# the hull and negative for stable materials below it.
+
 # dataframe of all models' energy above convex hull (EACH) predictions (eV/atom)
 df_each_pred = pd.DataFrame()
 for model in models:
 
@@ -13,10 +13,13 @@
 from pymatviz.utils import save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS
-from matbench_discovery.plots import cumulative_metrics
+from matbench_discovery.plots import (
+    cumulative_metrics,
+    plotly_line_styles,
+    plotly_markers,
+)
 from matbench_discovery.preds import (
     df_each_pred,
-    df_metrics,
     df_preds,
     each_true_col,
     models,
@@ -43,16 +46,14 @@
     # facet_col_wrap=2,
     # increase facet col gap
     facet_col_spacing=0.05,
+    # markers=True,
 )
 
 x_label = "Number of screened WBM test set materials"
 if backend == "matplotlib":
     # fig.suptitle(title)
     fig.text(0.5, -0.08, x_label, ha="center", fontdict={"size": 16})
 if backend == "plotly":
-    fig.layout.legend = dict(x=1, y=0, bgcolor="rgba(0,0,0,0)", xanchor="right")
-    if "MAE" in metrics:
-        fig.layout.legend.update(traceorder="reversed")
     fig.layout.margin.update(l=0, r=0, t=30, b=50)
     fig.add_annotation(
         x=0.5,
@@ -64,14 +65,21 @@
         font=dict(size=14),
     )
     fig.update_traces(line=dict(width=3))
-    fig.layout.legend.update(
-        orientation="h", yanchor="bottom", y=1.1, xanchor="center", x=0.5
-    )
+    fig.layout.legend.update(bgcolor="rgba(0,0,0,0)")
+    # fig.layout.legend.update(
+    #     orientation="h", yanchor="bottom", y=1.1, xanchor="center", x=0.5
+    # )
+    # if "MAE" in metrics:
+    #     fig.layout.legend.update(traceorder="reversed")
+
+    for trace, ls, marker in zip(fig.data, plotly_line_styles, plotly_markers):
+        trace.line.dash = ls
+        trace.marker.symbol = marker
 
-    for trace in fig.data:
         # show only the N best models by default
-        if trace.name in df_metrics.T.sort_values("F1").index[:-6]:
-            trace.visible = "legendonly"
+        # if trace.name in df_metrics.T.sort_values("F1").index[:-6]:
+        #     trace.visible = "legendonly"
+
         last_idx = pd.Series(trace.y).last_valid_index()
         last_x = trace.x[last_idx]
         last_y = trace.y[last_idx]
@@ -113,4 +121,4 @@
 # %%
 img_name = f"cumulative-{'-'.join(metrics).lower()}"
 save_fig(fig, f"{SITE_FIGS}/{img_name}.svelte")
-save_fig(fig, f"{PDF_FIGS}/{img_name}.pdf", width=900, height=400)
+save_fig(fig, f"{PDF_FIGS}/{img_name}.pdf", width=1000, height=400)
@@ -123,4 +123,4 @@
 fig.layout.height = n_rows * 180
 save_fig(fig, f"{SITE_FIGS}/{img_name}.svelte")
 fig.layout.height = orig_height
-save_fig(fig, f"{PDF_FIGS}/{img_name}.pdf", width=n_cols * 220, height=n_rows * 100)
+save_fig(fig, f"{PDF_FIGS}/{img_name}.pdf", width=n_cols * 280, height=n_rows * 130)
@@ -69,9 +69,19 @@
     fig.add_trace(box_plot)
 
 fig.layout.legend.update(orientation="h", y=1.15)
+# prevent x-labels from rotating
+fig.layout.xaxis.tickangle = 0
+# use line breaks to offset every other x-label
+x_labels_with_offset = [
+    label if idx % 2 == 0 else f"<br>{label}" for idx, label in enumerate(models)
+]
+fig.layout.xaxis.update(tickvals=models, ticktext=x_labels_with_offset)
+
 fig.show()
 
 
 # %%
 save_fig(fig, f"{SITE_FIGS}/box-hull-dist-errors.svelte")
+fig.layout.showlegend = False
 save_fig(fig, f"{PDF_FIGS}/box-hull-dist-errors.pdf")
+fig.layout.showlegend = True
@@ -80,9 +80,7 @@
 for anno in fig.layout.annotations:
     anno.text = anno.text.split("=", 1)[1]  # remove Model= from subplot titles
 
-line_styles = "solid dash dot dashdot".split() * 3
-markers = "circle square triangle-up triangle-down diamond cross star x".split() * 2
-for trace, ls, marker in zip(fig.data, line_styles, markers):
+for trace, ls, marker in zip(fig.data, plots.plotly_line_styles, plots.plotly_markers):
     trace.line.dash = ls
     trace.marker.symbol = marker
 
 
@@ -51,23 +51,23 @@
         if model in df_metrics.T.sort_values("MAE").index[8:]:
             trace.visible = "legendonly"  # show only top models by default
 
-    # increase line width
-    fig.update_traces(line=dict(width=3))
+    fig.update_traces(line=dict(width=3))  # increase line width
     fig.layout.legend.update(
         bgcolor="rgba(0,0,0,0)", title="", x=1.01, y=0, yanchor="bottom"
     )
-    # increase legend handle size and reverse order
     fig.layout.margin.update(l=5, r=5, t=5, b=55)
 
-    # plot marginal histogram of true hull distances
+    # plot marginal histogram of true hull distances along top of figure
+    # fixes plot artifacts by adding noise to avoid piling up data in some bins
+    # from rounded data
+    noise = np.random.random(len(df_preds)) * 1e-12
     counts, bins = np.histogram(
-        df_preds[each_true_col], bins=400, range=fig.layout.xaxis.range
+        df_preds[each_true_col] + noise, bins=100, range=fig.layout.xaxis.range
     )
     marginal_trace = go.Scatter(
         x=bins, y=counts, name="Density", fill="tozeroy", showlegend=False, yaxis="y2"
     )
     marginal_trace.marker.color = "rgba(0, 150, 200, 1)"
-    # add marginal trace to existing figure
     fig.add_trace(marginal_trace)
 
     # update layout to include marginal plot