janosh
diff --git a/‎data/wbm/analysis.py
+21-28 b/‎data/wbm/analysis.py
+21-28
diff --git a/‎data/wbm/figs/2022-12-07-hist-wbm-e-form-per-atom.svg ‎data/wbm/figs/hist-wbm-e-form-per-atom.svg b/‎data/wbm/figs/2022-12-07-hist-wbm-e-form-per-atom.svg ‎data/wbm/figs/hist-wbm-e-form-per-atom.svg
diff --git a/‎data/wbm/figs/2023-01-08-mp-elements.svg ‎data/wbm/figs/mp-elements.svg b/‎data/wbm/figs/2023-01-08-mp-elements.svg ‎data/wbm/figs/mp-elements.svg
diff --git a/‎data/wbm/figs/wbm-each-hist.svg
+1 b/‎data/wbm/figs/wbm-each-hist.svg
+1
diff --git a/‎data/wbm/figs/2023-01-08-wbm-elements.svg ‎data/wbm/figs/wbm-elements.svg b/‎data/wbm/figs/2023-01-08-wbm-elements.svg ‎data/wbm/figs/wbm-elements.svg
diff --git a/‎data/wbm/readme.md
+5-5 b/‎data/wbm/readme.md
+5-5
diff --git a/‎matbench_discovery/__init__.py
+3-2 b/‎matbench_discovery/__init__.py
+3-2
diff --git a/‎matbench_discovery/data.py
+2-2 b/‎matbench_discovery/data.py
+2-2
@@ -1,6 +1,7 @@
 # %%
 import os
 
+import numpy as np
 import pandas as pd
 from pymatviz import count_elements, ptable_heatmap_plotly
 from pymatviz.utils import save_fig
@@ -16,13 +17,13 @@
 
 module_dir = os.path.dirname(__file__)
 print(f"{pio.templates.default=}")
+about_data_page = f"{ROOT}/site/src/routes/about-the-test-set"
 
 
 # %%
 wbm_elem_counts = count_elements(df_wbm.formula).astype(int)
 
-out_elem_counts = f"{ROOT}/site/src/routes/about-the-test-set/wbm-element-counts.json"
-# wbm_elem_counts.to_json(out_elem_counts)
+# wbm_elem_counts.to_json(f"{about_data_page}/wbm-element-counts.json")
 
 
 # %%
@@ -46,17 +47,14 @@
 
 # %%
 wbm_fig.write_image(f"{module_dir}/figs/wbm-elements.svg", width=1000, height=500)
-save_fig(wbm_fig, f"{FIGS}/{today}-wbm-elements.svelte")
+# save_fig(wbm_fig, f"{FIGS}/wbm-elements.svelte")
 
 
 # %% load MP training set
 df = pd.read_json(f"{module_dir}/../mp/2022-08-13-mp-energies.json.gz")
 mp_elem_counts = count_elements(df.formula_pretty).astype(int)
 
-# mp_elem_counts.to_json(
-#     f"{ROOT}/site/src/routes/about-the-test-set/{today}-mp-element-counts.json"
-# )
-mp_elem_counts.describe()
+# mp_elem_counts.to_json(f"{about_data_page}/mp-element-counts.json")
 
 
 # %%
@@ -80,20 +78,20 @@
 
 # %%
 mp_fig.write_image(f"{module_dir}/figs/{today}-mp-elements.svg", width=1000, height=500)
-# save_fig(mp_fig, f"{FIGS}/{today}-mp-elements.svelte")
+# save_fig(mp_fig, f"{FIGS}/mp-elements.svelte")
 
 
 # %% histogram of energy above MP convex hull for WBM
 col = "e_above_hull_mp2020_corrected_ppd_mp"
 # col = "e_form_per_atom_mp2020_corrected"
 mean, std = df_wbm[col].mean(), df_wbm[col].std()
 
-fig = df_wbm[col].hist(
-    bins=100,
-    backend="plotly",
-    range_x=[mean - 2 * std, mean + 2 * std],
-    template="plotly_dark",
-)
+range_x = (mean - 2 * std, mean + 2 * std)
+counts, bins = np.histogram(df_wbm[col], bins=150, range=range_x)
+x_label = "WBM energy above MP convex hull (eV/atom)"
+df_hist = pd.DataFrame([counts, bins], index=["count", x_label]).T
+
+fig = df_hist.plot.area(x=x_label, y="count", backend="plotly", range_x=range_x)
 
 if col.startswith("e_above_hull"):
     n_stable = sum(df_wbm[col] <= 0)
@@ -108,25 +106,20 @@
     )
     fig.update_layout(title=dict(text=title, x=0.5, y=0.95))
 
-fig.update_layout(showlegend=False, paper_bgcolor="rgba(0,0,0,0)")
-fig.update_xaxes(title="WBM energy above MP convex hull (eV/atom)")
+fig.update_layout(showlegend=False)
 
-for x_pos, label in zip(
-    [mean, mean + std, mean - std],
-    [f"{mean = :.2f}", f"{mean + std = :.2f}", f"{mean - std = :.2f}"],
+for x_pos, label in (
+    (mean, f"{mean = :.2f}"),
+    (mean - std, f"{mean - std = :.2f}"),
+    (mean + std, f"{mean + std = :.2f}"),
 ):
-    anno = dict(text=label, yshift=-10, xshift=5)
+    anno = dict(text=label, yshift=-10, xshift=-5, xanchor="right")
     fig.add_vline(x=x_pos, line=dict(width=1, dash="dash"), annotation=anno)
 
 fig.show()
 
-
-# subsample x
-for trace in fig.data:
-    trace.x = trace.x[::8]
-
-save_fig(fig, f"{FIGS}/{today}-wbm-each-hist.svelte")
-save_fig(fig, f"./figs/{today}-wbm-each-hist.svg", width=1000, height=500)
+save_fig(fig, f"{FIGS}/wbm-each-hist.svelte")
+save_fig(fig, "./figs/wbm-each-hist.svg", width=1000, height=500)
 
 
 # %%
@@ -158,4 +151,4 @@
 
 fig.show()
 
-save_fig(fig, f"{FIGS}/{today}-mp-elemental-ref-energies.svelte")
+save_fig(fig, f"{FIGS}/mp-elemental-ref-energies.svelte")
@@ -25,7 +25,7 @@ The full set of processing steps used to curate the WBM test set from the raw da
 
   <caption>WBM Formation energy distribution. 524 materials outside green dashed lines were discarded.<br />(zoom out on this plot to see discarded samples)</caption>
   <slot name="hist-e-form-per-atom">
-    <img src="./figs/wbm-e-form-per-atom.svg" alt="WBM formation energy histogram indicating outlier cutoffs">
+    <img src="./figs/hist-wbm-e-form-per-atom.svg" alt="WBM formation energy histogram indicating outlier cutoffs">
   </slot>
 
 - apply the [`MaterialsProject2020Compatibility`](https://pymatgen.org/pymatgen.entries.compatibility.html#pymatgen.entries.compatibility.MaterialsProject2020Compatibility) energy correction scheme to the formation energies
@@ -67,8 +67,8 @@ The [paper itself][wbm paper] links to a [Halle University data page](https://td
 
 materialscloud:2021.68 includes a readme file with a description of the dataset, meanings of the summary CSV columns and a Python script for loading the data.
 
-| [Materials Cloud archive](https://archive.materialscloud.org/record/2021.68) | [step 1](https://archive.materialscloud.org/record/file?record_id=840&filename=step_1.json.bz2) | [step 2](https://archive.materialscloud.org/record/file?record_id=840&filename=step_2.json.bz2) | [step 3](https://archive.materialscloud.org/record/file?record_id=840&filename=step_3.json.bz2) | [step 4](https://archive.materialscloud.org/record/file?record_id=840&filename=step_4.json.bz2) | [step 5](https://archive.materialscloud.org/record/file?record_id=840&filename=step_5.json.bz2) | [summary](https://archive.materialscloud.org/record/file?record_id=840&filename=summary.txt.bz2) | [readme](https://archive.materialscloud.org/record/file?record_id=840&filename=README.txt) |
-| ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ |
+| [Materials Cloud](https://archive.materialscloud.org/record/2021.68) | [step 1](https://archive.materialscloud.org/record/file?record_id=840&filename=step_1.json.bz2) | [step 2](https://archive.materialscloud.org/record/file?record_id=840&filename=step_2.json.bz2) | [step 3](https://archive.materialscloud.org/record/file?record_id=840&filename=step_3.json.bz2) | [step 4](https://archive.materialscloud.org/record/file?record_id=840&filename=step_4.json.bz2) | [step 5](https://archive.materialscloud.org/record/file?record_id=840&filename=step_5.json.bz2) | [summary](https://archive.materialscloud.org/record/file?record_id=840&filename=summary.txt.bz2) | [readme](https://archive.materialscloud.org/record/file?record_id=840&filename=README.txt) |
+| -------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ |
 
 [wbm paper]: https://nature.com/articles/s41524-020-00481-6
 
@@ -79,13 +79,13 @@ The WBM test set and even more so the MP training set are heavily oxide dominate
 Element counts for WBM test set consisting of 256,963 WBM `ComputedStructureEntries`
 
 <slot name="wbm-elements-heatmap">
-  <img src="./figs/2023-01-08-wbm-elements.svg" alt="Periodic table log heatmap of WBM elements">
+  <img src="./figs/wbm-elements.svg" alt="Periodic table log heatmap of WBM elements">
 </slot>
 
 Element counts for MP training set consisting of 146,323 `ComputedStructureEntries`
 
 <slot name="mp-elements-heatmap">
-  <img src="./figs/2023-01-08-mp-elements.svg" alt="Periodic table log heatmap of MP elements">
+  <img src="./figs/mp-elements.svg" alt="Periodic table log heatmap of MP elements">
 </slot>
 
 ## 🎯 &thinsp; Target Distribution
 
@@ -25,5 +25,6 @@
 # load docs, repo, package URLs from package.json
 with open(f"{ROOT}/site/package.json") as file:
     pkg = json.load(file)
-    pypi_keys_to_npm = dict(Docs="homepage", Repo="repository", Package="package")
-    URLs = {key: pkg[val] for key, val in pypi_keys_to_npm.items()}
+
+pypi_keys_to_npm = dict(Docs="homepage", Repo="repository", Package="package")
+URLs = {key: pkg[val] for key, val in pypi_keys_to_npm.items()}
@@ -206,8 +206,8 @@ def load_df_wbm_preds(
 
     dfs: dict[str, pd.DataFrame] = {}
 
-    for model_name in (bar := tqdm(models, disable=not pbar)):
-        bar.set_description(model_name)
+    for model_name in (bar := tqdm(models, disable=not pbar, desc="Loading preds")):
+        bar.set_postfix_str(model_name)
         pattern = f"models/{PRED_FILENAMES[model_name]}"
         df = glob_to_df(pattern, pbar=False, **kwargs).set_index(id_col)
         dfs[model_name] = df