janosh
diff --git a/‎matbench_discovery/plots.py
+9-5 b/‎matbench_discovery/plots.py
+9-5
diff --git a/‎matbench_discovery/preds.py
+15-7 b/‎matbench_discovery/preds.py
+15-7
diff --git a/‎scripts/model_figs/cumulative_metrics.py
+15-12 b/‎scripts/model_figs/cumulative_metrics.py
+15-12
diff --git a/‎scripts/model_figs/make_hull_dist_box_plot.py
+28-15 b/‎scripts/model_figs/make_hull_dist_box_plot.py
+28-15
diff --git a/‎scripts/model_figs/model_compute_cost.py ‎scripts/model_figs/model_run_times.py
+6-3 b/‎scripts/model_figs/model_compute_cost.py ‎scripts/model_figs/model_run_times.py
+6-3
diff --git a/‎scripts/model_figs/roc_prc_curves_models.py
+14-4 b/‎scripts/model_figs/roc_prc_curves_models.py
+14-4
diff --git a/‎scripts/model_figs/rolling_mae_vs_hull_dist_models.py
+2-1 b/‎scripts/model_figs/rolling_mae_vs_hull_dist_models.py
+2-1
diff --git a/‎site/package.json
+4-4 b/‎site/package.json
+4-4
@@ -36,8 +36,10 @@
 
 plotly_markers = SymbolValidator().values[2::3]  # noqa: PD011
 plotly_line_styles = DashValidator().values[:-1]  # noqa: PD011
-# repeat line styles as many as times as needed to match number of markers
+plotly_colors = px.colors.qualitative.Plotly
+# repeat line styles/colors as many as times as needed to match number of markers
 plotly_line_styles *= len(plotly_markers) // len(plotly_line_styles)
+plotly_colors *= len(plotly_markers) // len(plotly_colors)
 
 
 def plotly_unit(text: str) -> str:
@@ -614,10 +616,12 @@ def rolling_mae_vs_hull_dist(
 
         line_styles = "solid dash dot dashdot".split()
         markers = "circle square triangle-up triangle-down diamond cross star x".split()
-        combinations = [(ls, mark) for mark in markers for ls in line_styles]
-        for idx, trace in enumerate(fig.data):
-            ls, marker = combinations[idx % len(combinations)]
-            trace.line.dash = ls
+        from matbench_discovery.preds import model_styles
+
+        for trace in fig.data:
+            if style := model_styles.get(trace.name):
+                ls, _marker, color = style
+                trace.line = dict(color=color, dash=ls, width=2)
             # marker_spacing = 2
             # trace = go.Scatter(
             #     x=trace.x[::marker_spacing],
 
@@ -9,7 +9,14 @@
 from matbench_discovery import ROOT, STABILITY_THRESHOLD
 from matbench_discovery.data import Files, df_wbm, glob_to_df
 from matbench_discovery.metrics import stable_metrics
-from matbench_discovery.plots import ev_per_atom, model_labels, quantity_labels
+from matbench_discovery.plots import (
+    ev_per_atom,
+    model_labels,
+    plotly_colors,
+    plotly_line_styles,
+    plotly_markers,
+    quantity_labels,
+)
 
 """Centralize data-loading and computing metrics for plotting scripts"""
 
@@ -52,11 +59,11 @@ class PredFiles(Files):
 
     # original MEGNet straight from publication, not re-trained
     megnet = "megnet/2022-11-18-megnet-wbm-IS2RE.csv.gz"
-    # CHGNet-relaxed structures fed into MEGNet for formation energy prediction
-    chgnet_megnet = "chgnet/2023-03-04-chgnet-wbm-IS2RE.csv.gz"
-    # M3GNet-relaxed structures fed into MEGNet for formation energy prediction
-    m3gnet_megnet = "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv.gz"
-    megnet_rs2re = "megnet/2023-08-23-megnet-wbm-RS2RE.csv.gz"
+    # # CHGNet-relaxed structures fed into MEGNet for formation energy prediction
+    # chgnet_megnet = "chgnet/2023-03-04-chgnet-wbm-IS2RE.csv.gz"
+    # # M3GNet-relaxed structures fed into MEGNet for formation energy prediction
+    # m3gnet_megnet = "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv.gz"
+    # megnet_rs2re = "megnet/2023-08-23-megnet-wbm-RS2RE.csv.gz"
 
     # Magpie composition+Voronoi tessellation structure features + sklearn random forest
     voronoi_rf = "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv.gz"
@@ -172,7 +179,8 @@ def load_df_wbm_with_preds(
 df_metrics_10k = df_metrics_10k.round(3).sort_values("F1", axis=1, ascending=False)
 
 models = list(df_metrics.T.MAE.sort_values().index)
-
+# used for consistent markers, line styles and colors for a given model across plots
+model_styles = dict(zip(models, zip(plotly_line_styles, plotly_markers, plotly_colors)))
 
 # To avoid confusion for anyone reading this code, we calculate the formation energy MAE
 # here and report it as the MAE for the energy above the convex hull prediction. The
 
@@ -13,15 +13,12 @@
 from pymatviz.utils import save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS
-from matbench_discovery.plots import (
-    cumulative_metrics,
-    plotly_line_styles,
-    plotly_markers,
-)
+from matbench_discovery.plots import cumulative_metrics
 from matbench_discovery.preds import (
     df_each_pred,
     df_preds,
     each_true_col,
+    model_styles,
     models,
 )
 
@@ -30,18 +27,17 @@
 
 
 # %%
-metrics = ("Precision", "Recall")
-# metrics = ("MAE", "RMSE")
+# metrics = ("Precision", "Recall")
+metrics = ("MAE", "RMSE")
 range_y = {
-    ("MAE", "RMSE"): (0, 0.5),
+    ("MAE", "RMSE"): (0, 0.7),
     ("Precision", "Recall"): (0, 1),
 }[metrics]
 fig, df_metric = cumulative_metrics(
     e_above_hull_true=df_preds[each_true_col],
     df_preds=df_each_pred[models],
     project_end_point="xy",
     backend=(backend := "plotly"),
-    range_y=range_y,
     metrics=metrics,
     # facet_col_wrap=2,
     # increase facet col gap
@@ -54,6 +50,9 @@
     # fig.suptitle(title)
     fig.text(0.5, -0.08, x_label, ha="center", fontdict={"size": 16})
 if backend == "plotly":
+    for key in filter(lambda key: key.startswith("yaxis"), fig.layout):
+        fig.layout[key].range = range_y
+
     fig.layout.margin.update(l=0, r=0, t=30, b=50)
     fig.add_annotation(
         x=0.5,
@@ -71,10 +70,14 @@
     # )
     # if "MAE" in metrics:
     #     fig.layout.legend.update(traceorder="reversed")
+    assert len(metrics) * len(models) == len(
+        fig.data
+    ), f"expected one trace per model per metric, got {len(fig.data)}"
 
-    for trace, ls, marker in zip(fig.data, plotly_line_styles, plotly_markers):
-        trace.line.dash = ls
-        trace.marker.symbol = marker
+    for trace in fig.data:
+        if line_style := model_styles.get(trace.name):
+            ls, _marker, color = line_style
+            trace.line = dict(color=color, dash=ls, width=2)
 
         # show only the N best models by default
         # if trace.name in df_metrics.T.sort_values("F1").index[:-6]:
 
@@ -33,6 +33,11 @@
 )
 ax.set(ylim=(-0.9, 0.9))
 
+for idx, label in enumerate(ax.get_xticklabels()):
+    label.set_va("bottom" if idx % 2 else "top")
+    # lower all labels
+    label.set_y(label.get_position()[1] - 0.05)
+
 
 # %%
 px.violin(
@@ -54,29 +59,37 @@
 fig.layout.yaxis.title = plots.quantity_labels["e_above_hull_error"]
 fig.layout.margin = dict(l=0, r=0, b=0, t=0)
 
-for col in models:
-    val_min = df_each_err[col].quantile(0.05)
-    lower_box = df_each_err[col].quantile(0.25)
-    median = df_each_err[col].median()
-    upper_box = df_each_err[col].quantile(0.75)
-    val_max = df_each_err[col].quantile(0.95)
-
-    box_plot = go.Box(
-        y=[val_min, lower_box, median, upper_box, val_max],
-        name=col,
-        width=0.7,
-    )
+for idx, model in enumerate(models):
+    ys = [df_each_err[model].quantile(quant) for quant in (0.05, 0.25, 0.5, 0.75, 0.95)]
+
+    box_plot = go.Box(y=ys, name=model, width=0.7)
     fig.add_trace(box_plot)
 
-fig.layout.legend.update(orientation="h", y=1.15)
+    # Add an annotation for the interquartile range
+    IQR = ys[3] - ys[1]
+    median = ys[2]
+    fig.add_annotation(
+        x=idx, y=1, text=f"{IQR:.2}", showarrow=False, yref="paper", yshift=-10
+    )
+    fig.add_annotation(
+        x=idx,
+        y=median,
+        text=f"{median:.2}",
+        showarrow=False,
+        yshift=7,
+        # bgcolor="rgba(0, 0, 0, 0.2)",
+        # width=50,
+    )
+fig.add_annotation(x=-0.6, y=1, text="IQR", showarrow=False, yref="paper", yshift=-10)
+
+fig.layout.legend.update(orientation="h", y=1.2)
 # prevent x-labels from rotating
 fig.layout.xaxis.tickangle = 0
 # use line breaks to offset every other x-label
 x_labels_with_offset = [
-    label if idx % 2 == 0 else f"<br>{label}" for idx, label in enumerate(models)
+    f"{'<br>' * (idx % 2)}{label}" for idx, label in enumerate(models)
 ]
 fig.layout.xaxis.update(tickvals=models, ticktext=x_labels_with_offset)
-
 fig.show()
 
 
 
@@ -19,7 +19,7 @@
 from tqdm import tqdm
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS, SITE_MODELS, WANDB_PATH
-from matbench_discovery.preds import df_metrics, df_metrics_10k, df_preds
+from matbench_discovery.preds import df_metrics, df_metrics_10k, df_preds, model_styles
 
 __author__ = "Janosh Riebesell"
 __date__ = "2022-11-28"
@@ -194,6 +194,7 @@
     text_auto=".0f",
     text=time_col,
     color=model_col,
+    color_discrete_sequence=[model_styles[model][2] for model in df_melt[model_col]],
 )
 # reduce bar width
 fig.update_traces(width=0.8)
@@ -202,8 +203,11 @@
 fig.layout.legend.update(title=title, orientation="h", xanchor="center", x=0.4, y=1.2)
 fig.layout.xaxis.title = ""
 fig.layout.margin.update(l=0, r=0, t=0, b=0)
-save_fig(fig, f"{SITE_FIGS}/model-run-times-bar.svelte")
+fig.show()
+
 
+# %%
+save_fig(fig, f"{SITE_FIGS}/model-run-times-bar.svelte")
 pdf_fig = go.Figure(fig)
 # replace legend with annotation in PDF
 pdf_fig.layout.showlegend = False
@@ -217,4 +221,3 @@
     yref="paper",
 )
 save_fig(pdf_fig, f"{PDF_FIGS}/model-run-times-bar.pdf", height=300, width=800)
-fig.show()
 
@@ -14,7 +14,13 @@
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS, STABILITY_THRESHOLD
 from matbench_discovery import plots as plots
-from matbench_discovery.preds import df_each_pred, df_preds, each_true_col, models
+from matbench_discovery.preds import (
+    df_each_pred,
+    df_preds,
+    each_true_col,
+    model_styles,
+    models,
+)
 
 __author__ = "Janosh Riebesell"
 __date__ = "2023-01-30"
@@ -80,9 +86,13 @@
 for anno in fig.layout.annotations:
     anno.text = anno.text.split("=", 1)[1]  # remove Model= from subplot titles
 
-for trace, ls, marker in zip(fig.data, plots.plotly_line_styles, plots.plotly_markers):
-    trace.line.dash = ls
-    trace.marker.symbol = marker
+
+for trace in fig.data:
+    if styles := model_styles.get(trace.name.split(" · ")[0]):
+        ls, marker, color = styles
+        trace.line = dict(color=color, dash=ls, width=2)
+        trace.marker = dict(color=color, symbol=marker, size=4)
+
 
 if not facet_plot:
     fig.layout.legend.update(x=1, y=0, xanchor="right", title=None)
 
@@ -46,9 +46,10 @@
     for line in fig.lines:
         line._linewidth *= 2
 else:
+    show_n_best_models = len(models)
     for trace in fig.data:
         model = trace.name.split(" MAE=")[0]
-        if model in df_metrics.T.sort_values("MAE").index[8:]:
+        if model in df_metrics.T.sort_values("MAE").index[show_n_best_models:]:
             trace.visible = "legendonly"  # show only top models by default
 
     fig.update_traces(line=dict(width=3))  # increase line width
 
@@ -20,14 +20,14 @@
     "@iconify/svelte": "^3.1.4",
     "@rollup/plugin-yaml": "^4.1.1",
     "@sveltejs/adapter-static": "^2.0.3",
-    "@sveltejs/kit": "^1.22.6",
+    "@sveltejs/kit": "^1.23.0",
     "@sveltejs/vite-plugin-svelte": "^2.4.5",
     "@typescript-eslint/eslint-plugin": "^6.4.1",
     "@typescript-eslint/parser": "^6.4.1",
     "d3-scale-chromatic": "^3.0.0",
     "elementari": "^0.2.2",
-    "eslint": "^8.47.0",
-    "eslint-plugin-svelte": "^2.32.4",
+    "eslint": "^8.48.0",
+    "eslint-plugin-svelte": "^2.33.0",
     "hastscript": "^8.0.0",
     "highlight.js": "^11.8.0",
     "js-yaml": "^4.1.0",
@@ -47,7 +47,7 @@
     "svelte-zoo": "^0.4.9",
     "svelte2tsx": "^0.6.20",
     "tslib": "^2.6.2",
-    "typescript": "5.1.6",
+    "typescript": "5.2.2",
     "vite": "^4.4.9"
   },
   "prettier": {