fix scripts/hist_classified_stable_vs_hull_dist_models.py

janosh · janosh · commit 4ae422d71c54 · 2023-06-19T20:29:22.000-07:00
was overlapping, not stacking histograms in same plot and failed for backend=plotly
diff --git a/data/wbm/analysis.py b/data/wbm/analysis.py
@@ -3,9 +3,9 @@
 
 import pandas as pd
 from pymatviz import count_elements, ptable_heatmap_plotly
+from pymatviz.utils import save_fig
 
 from matbench_discovery import ROOT, today
-from matbench_discovery.plots import write_html
 
 module_dir = os.path.dirname(__file__)
 
@@ -47,7 +47,7 @@
 
 # %%
 fig.write_image(f"{module_dir}/{today}-wbm-elements.svg", width=1000, height=500)
-write_html(fig, f"{module_dir}/{today}-wbm-elements.svelte")
+save_fig(fig, f"{module_dir}/{today}-wbm-elements.svelte")
 
 
 # %% load MP training set
@@ -82,4 +82,4 @@
 
 # %%
 fig.write_image(f"{module_dir}/{today}-mp-elements.svg", width=1000, height=500)
-write_html(fig, f"{module_dir}/{today}-mp-elements.svelte")
+save_fig(fig, f"{module_dir}/{today}-mp-elements.svelte")
diff --git a/data/wbm/fetch_process_wbm_dataset.py b/data/wbm/fetch_process_wbm_dataset.py
@@ -18,6 +18,7 @@
 )
 from pymatgen.entries.computed_entries import ComputedStructureEntry
 from pymatviz import density_scatter
+from pymatviz.utils import save_fig
 from tqdm import tqdm
 
 from matbench_discovery import ROOT, today
@@ -448,14 +449,10 @@ def fix_bad_struct_index_mismatch(material_id: str) -> str:
 # no need to store all 250k x values in plot, leads to 1.7 MB file, subsample every 10th
 # point is enough to see the distribution
 fig.data[0].x = fig.data[0].x[::10]
-# recommended to upload to vecta.io/nano afterwards for compression
-fig.write_image(f"{module_dir}/{today}-hist-e-form-per-atom.svg", width=800, height=300)
-fig.write_html(
-    f"{module_dir}/{today}-hist-e-form-per-atom.svelte",
-    include_plotlyjs=False,
-    full_html=False,
-    config=dict(showTips=False, displayModeBar=False, scrollZoom=True),
-)
+# recommended to upload SVG to vecta.io/nano afterwards for compression
+img_path = f"{module_dir}/{today}-hist-e-form-per-atom"
+save_fig(fig, f"{img_path}.svg", width=800, height=300)
+save_fig(fig, f"{img_path}.svelte")
 
 
 # %%
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -221,7 +221,8 @@ def hist_classified_stable_vs_hull_dist(
             color="clf",
             nbins=20000,
             range_x=x_lim,
-            opacity=0.9,
+            barmode="stack",
+            color_discrete_map=dict(zip(labels, px.colors.qualitative.Pastel)),
             **kwargs,
         )
         ax.update_layout(
@@ -638,26 +639,3 @@ def wandb_scatter(table: wandb.Table, fields: dict[str, str], **kwargs: Any) ->
     )
 
     wandb.log({"true_pred_scatter": scatter_plot})
-
-
-def write_html(fig: go.Figure, path: str, **kwargs: Any) -> None:
-    """Write a plotly figure to an HTML file. If the file is has .svelte extension,
-    insert `{...$$props}` into the figure's top-level div so it can be styled by
-    consuming Svelte code
-
-    Args:
-        fig (go.Figure): Plotly figure.
-        path (str): Path to HTML file that will be created.
-        **kwargs: Keyword arguments passed to fig.write_html().
-    """
-    config = dict(
-        showTips=False, displayModeBar=False, scrollZoom=True, responsive=True
-    )
-    fig.write_html(
-        path, include_plotlyjs=False, full_html=False, config=config, **kwargs
-    )
-    if path.lower().endswith(".svelte"):
-        # insert {...$$props} into top-level div to be able to post-process and style
-        # plotly figures from within Svelte files
-        text = open(path).read().replace("<div>", "<div {...$$props}>", 1)
-        open(path, "w").write(text)
diff --git a/scripts/cumulative_clf_metrics.py b/scripts/cumulative_clf_metrics.py
@@ -1,9 +1,10 @@
 # %%
 import pandas as pd
+from pymatviz.utils import save_fig
 
 from matbench_discovery import FIGS, today
 from matbench_discovery.data import load_df_wbm_with_preds
-from matbench_discovery.plots import cumulative_precision_recall, write_html
+from matbench_discovery.plots import cumulative_precision_recall
 
 __author__ = "Janosh Riebesell, Rhys Goodall"
 __date__ = "2022-12-04"
@@ -12,7 +13,7 @@
 # %%
 models = (
     # Wren, CGCNN IS2RE, CGCNN RS2RE
-    "Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
+    "Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet, CGCNN, CGCNN debug"
 ).split(", ")
 
 df_wbm = load_df_wbm_with_preds(models=models).round(3)
@@ -50,16 +51,12 @@
 
 
 # %%
-img_path = f"{FIGS}/{today}-cumulative-clf-metrics"
-
 # file will be served by site
 # so we round y floats to reduce file size since
 for trace in fig.data:
     assert isinstance(trace.y[0], float)
     trace.y = [round(y, 3) for y in trace.y]
 
-if hasattr(fig, "write_image"):
-    fig.write_image(f"{img_path}.pdf")
-    write_html(fig, f"{img_path}.svelte")
-else:
-    fig.savefig(f"{img_path}.pdf")
+img_path = f"{FIGS}/{today}-cumulative-clf-metrics"
+# save_fig(fig, f"{img_path}.pdf")
+save_fig(fig, f"{img_path}.svelte")
diff --git a/scripts/hist_classified_stable_vs_hull_dist.py b/scripts/hist_classified_stable_vs_hull_dist.py
@@ -1,4 +1,6 @@
 # %%
+from pymatviz.utils import save_fig
+
 from matbench_discovery import FIGS, today
 from matbench_discovery.data import load_df_wbm_with_preds
 from matbench_discovery.plots import WhichEnergy, hist_classified_stable_vs_hull_dist
@@ -60,9 +62,6 @@
 
 
 # %%
-img_path = f"{FIGS}/{today}-wren-wbm-hull-dist-hist-{which_energy=}.pdf"
-if hasattr(ax, "write_image"):
-    # fig.write_image(img_path)
-    ax.write_html(img_path.replace(".pdf", ".html"))
-else:
-    ax.figure.savefig(img_path)
+img_path = f"{FIGS}/{today}-wren-wbm-hull-dist-hist-{which_energy=}"
+# save_fig(ax, f"{img_path}.pdf")
+save_fig(ax, f"{img_path}.html")
diff --git a/scripts/hist_classified_stable_vs_hull_dist_batches.py b/scripts/hist_classified_stable_vs_hull_dist_batches.py
@@ -1,4 +1,6 @@
 # %%
+from pymatviz.utils import save_fig
+
 from matbench_discovery import FIGS, today
 from matbench_discovery.data import load_df_wbm_with_preds
 from matbench_discovery.plots import (
@@ -70,4 +72,4 @@
 
 # %%
 img_path = f"{FIGS}/{today}-{model_name}-wbm-hull-dist-hist-batches.pdf"
-# ax.figure.savefig(img_path)
+save_fig(ax, img_path)
diff --git a/scripts/hist_classified_stable_vs_hull_dist_models.py b/scripts/hist_classified_stable_vs_hull_dist_models.py
@@ -1,5 +1,6 @@
 # %%
 from plotly.subplots import make_subplots
+from pymatviz.utils import save_fig
 
 from matbench_discovery import FIGS, today
 from matbench_discovery.data import load_df_wbm_with_preds
@@ -21,10 +22,9 @@
 
 
 # %%
-models = (
-    "Wren, CGCNN, CGCNN IS2RE, CGCNN RS2RE, Voronoi RF, "
-    "Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
-).split(", ")
+models = sorted(
+    "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
+)
 df_wbm = load_df_wbm_with_preds(models=models).round(3)
 
 target_col = "e_form_per_atom_mp2020_corrected"
@@ -35,11 +35,15 @@
 which_energy: WhichEnergy = "true"
 model_name = "Wrenformer"
 
-backend: Backend = "matplotlib"
+backend: Backend = "plotly"
+rows, cols = len(models) // 3, 3
 if backend == "matplotlib":
-    fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(18, 12))
+    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(6 * cols, 5 * rows))
 else:
-    fig = make_subplots(rows=3, cols=3)
+    x_title = "distance to convex hull (eV/atom)"
+    fig = make_subplots(
+        rows=rows, cols=cols, y_title="Count", x_title=x_title, subplot_titles=models
+    )
 
 
 for idx, model_name in enumerate(models):
@@ -48,22 +52,27 @@
         e_above_hull_pred=df_wbm[e_above_hull_col]
         + (df_wbm[model_name] - df_wbm[target_col]),
         which_energy=which_energy,
-        ax=axs.flat[idx],
+        ax=axs.flat[idx] if backend == "matplotlib" else None,
         backend=backend,
     )
     title = f"{model_name} ({len(df_wbm[model_name].dropna()):,})"
     text = f"Enrichment\nFactor = {metrics['enrichment']:.3}"
+    row, col = idx % rows + 1, idx // rows + 1
 
     if backend == "matplotlib":
         ax.text(0.02, 0.25, text, fontsize=16, transform=ax.transAxes)
         ax.set(title=title)
 
+    # no need to store all 250k x values in plot, leads to 1.7 MB file, subsample every 10th
+    # point is enough to see the distribution
+    for trace in ax.data:
+        trace.x = trace.x[::10]
+
     else:
-        ax.add_annotation(text=text, x=0.5, y=0.5, showarrow=False)
-        ax.update_xaxes(title_text=title)
+        fig.add_annotation(text=text, x=0.5, y=0.5, showarrow=False)
+        fig.add_traces(ax.data, rows=row, cols=col)
+        # fig.update_xaxes(title_text=title, row=row, col=col)
 
-        for trace in ax.data:
-            fig.append_trace(trace, row=idx % 3 + 1, col=idx // 3 + 1)
 
 if backend == "matplotlib":
     fig.suptitle(f"{today} {which_energy=}", y=1.07, fontsize=16)
@@ -74,14 +83,16 @@
         bbox_to_anchor=(0.5, -0.05),
         frameon=False,
     )
+else:
+    fig.update_xaxes(range=[-0.4, 0.4])
+    fig.update_layout(showlegend=False, barmode="stack")
+
 
-fig.show()
+fig.show(config=dict(responsive=True))
 
 
 # %%
 img_path = f"{FIGS}/{today}-wbm-hull-dist-hist-models"
-# if hasattr(fig, "write_image"):
-#     fig.write_image(f"{img_path}.pdf")
-#     fig.write_html(f"{img_path}.html", include_ploltyjs="cdn")
-# else:
-#     fig.savefig(f"{img_path}.pdf")
+save_fig(fig, f"{img_path}.html")
+# save_fig(fig, f"{img_path}.png", scale=3)
+# save_fig(fig, f"{img_path}.pdf")
diff --git a/scripts/make_api_docs.py b/scripts/make_api_docs.py
@@ -3,7 +3,9 @@
 from glob import glob
 from subprocess import run
 
-# update generated API docs on production builds
+# Update auto-generated API docs. Also tweak lazydocs's markdown output for
+# - prettier badges linking to source code on GitHub
+# - remove bold tags since they break inline code
 
 pkg = json.load(open("site/package.json"))
 route = "site/src/routes/api"
diff --git a/scripts/rolling_mae_vs_hull_dist_all_models.py b/scripts/rolling_mae_vs_hull_dist_all_models.py
@@ -0,0 +1,60 @@
+# %%
+from plotly.subplots import make_subplots
+from pymatviz.utils import save_fig
+
+from matbench_discovery import FIGS, today
+from matbench_discovery.data import load_df_wbm_with_preds
+from matbench_discovery.plots import Backend, rolling_mae_vs_hull_dist
+
+__author__ = "Rhys Goodall, Janosh Riebesell"
+__date__ = "2022-06-18"
+
+
+# %%
+models = sorted(
+    "Wrenformer, CGCNN, Voronoi RF, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
+)
+
+df_wbm = load_df_wbm_with_preds(models=models).round(3)
+
+
+# %%
+target_col = "e_form_per_atom_mp2020_corrected"
+e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"
+backend: Backend = "plotly"
+
+rows, cols = len(models) // 3, 3
+if backend == "plotly":
+    fig = make_subplots(rows=rows, cols=cols)
+
+
+for idx, model_name in enumerate(models):
+    row, col = idx % rows + 1, idx // rows + 1
+
+    # assert df_wbm[model_name].isna().sum() < 100
+    preds = df_wbm[target_col] - df_wbm[model_name]
+    MAE = (df_wbm[e_above_hull_col] - preds).abs().mean()
+
+    ax = rolling_mae_vs_hull_dist(
+        e_above_hull_true=df_wbm[e_above_hull_col],
+        e_above_hull_error=preds,
+        label=f"{model_name} · {MAE=:.2f}",
+        backend=backend,
+    )
+    if backend == "plotly":
+        fig.add_traces(ax.data, row=row, col=col)
+
+if hasattr(ax, "legend"):
+    # increase line width in legend
+    legend = ax.legend(frameon=False, loc="lower right")
+    ax.figure.set_size_inches(10, 9)
+    for line in legend.get_lines():
+        line._linewidth *= 3
+
+
+fig.show()
+
+
+# %%
+img_path = f"{FIGS}/{today}-rolling-mae-vs-hull-dist-compare-models"
+save_fig(fig, f"{img_path}.pdf")
diff --git a/site/package.json b/site/package.json
@@ -13,7 +13,7 @@
     "preview": "vite preview",
     "serve": "vite build && vite preview",
     "check": "svelte-check",
-    "make-api-docs": "python ../scripts/make_api_docs.py"
+    "make-api-docs": "cd .. && python ../scripts/make_api_docs.py"
   },
   "devDependencies": {
     "@iconify/svelte": "^3.0.1",
@@ -25,16 +25,13 @@
     "@typescript-eslint/parser": "^5.48.1",
     "eslint": "^8.31.0",
     "eslint-plugin-svelte3": "^4.0.0",
-    "hast-util-from-string": "^2.0.0",
-    "hast-util-select": "^5.0.3",
-    "hast-util-to-string": "^2.0.0",
     "hastscript": "^7.2.0",
-    "highlight.js": "^11.7.0",
     "katex": "^0.16.4",
     "mdsvex": "^0.10.6",
     "prettier": "^2.8.2",
     "prettier-plugin-svelte": "^2.9.0",
     "rehype-autolink-headings": "^6.1.1",
+    "rehype-katex-svelte": "^1.1.2",
     "rehype-slug": "^5.1.0",
     "remark-math": "3.0.0",
     "svelte": "^3.55.1",
diff --git a/site/src/app.html b/site/src/app.html
@@ -30,7 +30,7 @@
     <!-- math display -->
     <link
       rel="stylesheet"
-      href="https://cdn.jsdelivr.net/npm/katex@0.15.0/dist/katex.min.css"
+      href="https://cdn.jsdelivr.net/npm/katex@latest/dist/katex.min.css"
     />
 
     %sveltekit.head%
diff --git a/site/svelte.config.js b/site/svelte.config.js
@@ -1,7 +1,4 @@
 import adapter from '@sveltejs/adapter-static'
-import { fromString } from 'hast-util-from-string'
-import { selectAll } from 'hast-util-select'
-import { toString } from 'hast-util-to-string'
 import { s } from 'hastscript'
 import katex from 'katex'
 import { mdsvex } from 'mdsvex'
@@ -11,18 +8,7 @@ import math from 'remark-math'
 import preprocess from 'svelte-preprocess'
 
 const rehypePlugins = [
-  // from https://github.com/kwshi/rehype-katex-svelte
-  (options = {}) =>
-    (tree) => {
-      for (const node of selectAll(`.math-inline,.math-display`, tree)) {
-        const displayMode = node.properties?.className?.includes(`math-display`)
-        const rendered = katex.renderToString(toString(node), {
-          ...options,
-          displayMode,
-        })
-        fromString(node, `{@html ${JSON.stringify(rendered)}}`)
-      }
-    },
+  katex,
   heading_slugs,
   [
     link_headings,
diff --git a/tests/test_plots.py b/tests/test_plots.py