Skip to content

Commit 79c8042

Browse files
committed
fix outdated numbers in data/wbm/readme.md
rename figs/wbm-each-hist.(svg|svelte) to figs/hist-wbm-hull-dist.(svg|svelte)
1 parent fc20ccf commit 79c8042

File tree

6 files changed

+53
-69
lines changed

6 files changed

+53
-69
lines changed

data/wbm/eda.py

+38-54
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313
from pymatviz.utils import save_fig
1414

15-
from matbench_discovery import FIGS, PDF_FIGS, ROOT, STABILITY_THRESHOLD, today
15+
from matbench_discovery import FIGS, PDF_FIGS, ROOT, STABILITY_THRESHOLD
1616
from matbench_discovery import plots as plots
1717
from matbench_discovery.data import DATA_FILES, df_wbm
1818
from matbench_discovery.energy import mp_elem_reference_entries
@@ -35,14 +35,25 @@
3535

3636

3737
# %%
38-
for count_mode in ["occurrence", "composition"]:
39-
wbm_elem_counts = count_elements(df_wbm.formula, count_mode=count_mode).astype(int)
38+
wbm_occu_counts = count_elements(df_wbm.formula, count_mode="occurrence").astype(int)
39+
wbm_comp_counts = count_elements(df_wbm.formula, count_mode="composition")
40+
41+
mp_occu_counts = count_elements(df_mp.formula_pretty, count_mode="occurrence").astype(
42+
int
43+
)
44+
mp_comp_counts = count_elements(df_mp.formula_pretty, count_mode="composition")
45+
46+
all_counts = (
47+
("wbm", "occurrence", wbm_occu_counts),
48+
("wbm", "composition", wbm_comp_counts),
49+
("mp", "occurrence", mp_occu_counts),
50+
("mp", "composition", mp_comp_counts),
51+
)
4052

41-
wbm_elem_counts.to_json(f"{about_data_page}/wbm-element-counts-{count_mode}.json")
42-
mp_elem_counts = count_elements(df_mp.formula_pretty, count_mode=count_mode).astype(
43-
int
44-
)
45-
mp_elem_counts.to_json(f"{about_data_page}/mp-element-counts-{count_mode}.json")
53+
54+
# %%
55+
for dataset, count_mode, elem_counts in all_counts:
56+
elem_counts.to_json(f"{about_data_page}/{dataset}-element-counts-{count_mode}.json")
4657

4758

4859
# %% export element counts by WBM step to JSON
@@ -64,51 +75,21 @@
6475

6576

6677
# %%
67-
wbm_fig = ptable_heatmap_plotly(
68-
wbm_elem_counts.drop("Xe"),
69-
log=True,
70-
colorscale="RdBu",
71-
hover_props=dict(atomic_number="atomic number"),
72-
hover_data=wbm_elem_counts,
73-
)
74-
75-
title = "WBM Elements"
76-
wbm_fig.update_layout(
77-
title=dict(text=title, x=0.35, y=0.9, font_size=20),
78-
xaxis=dict(fixedrange=True),
79-
yaxis=dict(fixedrange=True),
80-
paper_bgcolor="rgba(0,0,0,0)",
81-
)
82-
wbm_fig.show()
83-
84-
85-
# %%
86-
wbm_fig.write_image(f"{module_dir}/figs/wbm-elements.svg", width=1000, height=500)
87-
# save_fig(wbm_fig, f"{FIGS}/wbm-elements.svelte")
88-
89-
90-
# %%
91-
mp_fig = ptable_heatmap_plotly(
92-
mp_elem_counts[mp_elem_counts > 1],
93-
log=True,
94-
colorscale="RdBu",
95-
hover_props=dict(atomic_number="atomic number"),
96-
hover_data=mp_elem_counts,
97-
)
98-
99-
title = "MP Elements"
100-
mp_fig.update_layout(
101-
title=dict(text=title, x=0.35, y=0.9, font_size=20),
102-
xaxis=dict(fixedrange=True),
103-
yaxis=dict(fixedrange=True),
104-
paper_bgcolor="rgba(0,0,0,0)",
105-
)
106-
mp_fig.show()
107-
78+
for dataset, count_mode, elem_counts in all_counts:
79+
ptable = ptable_heatmap_plotly(
80+
elem_counts.drop("Xe")[elem_counts > 1],
81+
font_size=11,
82+
color_bar=dict(title=dict(text=f"WBM {count_mode} counts", font_size=24)),
83+
# log=True,
84+
# colorscale="cividis",
85+
hover_props=dict(atomic_number="atomic number"),
86+
hover_data=wbm_occu_counts,
87+
)
10888

109-
# %%
110-
mp_fig.write_image(f"{module_dir}/figs/{today}-mp-elements.svg", width=1000, height=500)
111-
# save_fig(mp_fig, f"{FIGS}/mp-elements.svelte")
89+
ptable.layout.margin = dict(l=0, r=0, b=0, t=0)
90+
ptable.show()
91+
# save_fig(ptable, f"{module_dir}/figs/wbm-elements.svg", width=1000, height=500)
92+
save_fig(ptable, f"{PDF_FIGS}/{dataset}-element-{count_mode}-counts.pdf")
11293

11394

11495
# %% histogram of energy above MP convex hull for WBM
@@ -148,8 +129,9 @@
148129

149130
fig.show()
150131

151-
save_fig(fig, f"{FIGS}/wbm-each-hist.svelte")
152-
save_fig(fig, "./figs/wbm-each-hist.svg", width=1000, height=500)
132+
# save_fig(fig, f"{FIGS}/hist-wbm-hull-dist.svelte")
133+
# save_fig(fig, "./figs/hist-wbm-hull-dist.svg", width=1000, height=500)
134+
save_fig(fig, f"{PDF_FIGS}/hist-wbm-hull-dist.pdf")
153135

154136

155137
# %%
@@ -254,6 +236,7 @@
254236
# %%
255237
fig = spacegroup_sunburst(df_wbm[spg_col], width=350, height=350, show_counts="percent")
256238
fig.layout.title.update(text="WBM Spacegroup Sunburst", x=0.5, font_size=14)
239+
fig.layout.margin = dict(l=0, r=0, t=30, b=0)
257240
fig.show()
258241
save_fig(fig, f"{FIGS}/spacegroup-sunburst-wbm.svelte")
259242
save_fig(fig, f"{PDF_FIGS}/spacegroup-sunburst-wbm.pdf")
@@ -262,6 +245,7 @@
262245
# %%
263246
fig = spacegroup_sunburst(df_mp[spg_col], width=350, height=350, show_counts="percent")
264247
fig.layout.title.update(text="MP Spacegroup Sunburst", x=0.5, font_size=14)
248+
fig.layout.margin = dict(l=0, r=0, t=30, b=0)
265249
fig.show()
266250
save_fig(fig, f"{FIGS}/spacegroup-sunburst-mp.svelte")
267251
save_fig(fig, f"{PDF_FIGS}/spacegroup-sunburst-mp.pdf")
File renamed without changes.

data/wbm/readme.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,16 @@ materialscloud:2021.68 includes a readme file with a description of the dataset,
7474

7575
## 🎯   Target Distribution
7676

77-
The WBM test set has an energy above the MP convex hull distribution with **mean ± std = 0.02 ± 0.25 eV/atom**.
77+
Below is a histogram of hull distances of all WBM test set structures with respect to the Materials Project convex hull. This is the target distribution that the models are asked to predict.
7878

7979
The dummy MAE of always predicting the test set mean is **0.12 eV/atom**.
8080

81-
The number of stable materials (according to the MP convex hull which is spanned by the training data the models have access to) is **97k** out of **257k**, resulting in a dummy stability hit rate of **37%**.
81+
The number of stable materials (according to the MP convex hull which is spanned by the training data the models have access to) is **43k** out of **257k**, resulting in a dummy stability hit rate of **16.7%**.
8282

8383
> Note: [According to the authors](https://www.nature.com/articles/s41524-020-00481-6#Sec2), the stability rate w.r.t. to the more complete hull constructed from the combined train and test set (MP + WBM) for the first 3 rounds of elemental substitution is 18,479 out of 189,981 crystals ($\approx$ 9.7%).
8484
85-
<slot name="wbm-each-hist">
86-
<img src="./figs/wbm-each-hist.svg" alt="WBM energy above MP convex hull distribution">
85+
<slot name="hist-wbm-hull-dist">
86+
<img src="./figs/hist-wbm-hull-dist.svg" alt="WBM energy above MP convex hull distribution">
8787
</slot>
8888

8989
## 🧪 &thinsp; Chemical Diversity
@@ -104,6 +104,6 @@ Element counts for MP training set consisting of 146,323 `ComputedStructureEntri
104104

105105
## 📊 &thinsp; Symmetry Statistics
106106

107-
Both the MP training and WBM test set have good coverage of all 7 crystal systems, triclinic crystals being the only notable exception at just 1% prevalence in WBM but still well represented in MP (15%). In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization.
107+
These sunburst diagrams show the spacegroup distribution of MP on the left and WBM on the right. Both have good coverage of all 7 crystal systems, the only exception being triclinic crystals which are just 1% of WBM but well represented in MP (15%). The 3 largest systems in MP are monoclinic, orthorhombic and triclinic vs orthorhombic, tetragonal and cubic in WBM. So WBM structures have overall higher symmetry which can benefit some models more than others. Wrenformer in particular uses symmetries as a coarse-grained description of the underlying structure. Its representations basically degrades to composition only on symmetry-less P1 structures. Given this spacegroup distribution, it should fare well on the WBM test set. The fact that Wrenformer is still outperformed by all interatomic potentials and some single-shot GNNs indicates the underlying methodology is unable to compete. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a specific Wrenformer failure case.
108108

109109
<slot name="spacegroup-sunbursts" />

matbench_discovery/plots.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -887,24 +887,24 @@ def df_to_pdf(
887887
except ImportError as exc:
888888
raise ImportError(
889889
"pdfkit not installed\nrun pip install pdfkit && brew install "
890-
"homebrew/cask/wkhtmltopdf"
890+
"homebrew/cask/wkhtmltopdf\n(brew is macOS only, use apt un linux))"
891891
) from exc
892892

893+
pdfkit.from_string(styler.to_html(**kwargs), file_path)
894+
if not crop:
895+
return
893896
try:
894897
# needed to auto-crop large white margins from PDF
895898
# pip install pdfCropMargins
896899
from pdfCropMargins import crop as crop_pdf
897-
except ImportError as exc:
898-
raise ImportError(
899-
"pdfCropMargins not installed\nrun pip install pdfCropMargins.\n"
900-
) from exc
901900

902-
pdfkit.from_string(styler.to_html(**kwargs), file_path)
903-
try:
904901
# Remove PDF margins
905902
cropped_file_path, _exit_code, _stdout, _stderr = crop_pdf(
906903
["--percentRetain", "0", file_path]
907904
)
908905
os.replace(cropped_file_path, file_path)
906+
except ImportError as exc:
907+
msg = "pdfCropMargins not installed\nrun pip install pdfCropMargins"
908+
raise ImportError(msg) from exc
909909
except Exception as exc:
910910
raise RuntimeError("Error cropping PDF margins") from exc
File renamed without changes.

site/src/routes/about-the-data/+page.svelte

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
<script lang="ts">
22
import { browser } from '$app/environment'
33
import FormEnergyHist from '$figs/hist-wbm-e-form-per-atom.svelte'
4+
import HistWbmHullDist from '$figs/hist-wbm-hull-dist.svelte'
45
import SpacegroupSunburstMp from '$figs/spacegroup-sunburst-mp.svelte'
56
import SpacegroupSunburstWbm from '$figs/spacegroup-sunburst-wbm.svelte'
6-
import WbmEachHist from '$figs/wbm-each-hist.svelte'
77
import { PtableInset } from '$lib'
88
import DataReadme from '$root/data/wbm/readme.md'
99
import type { ChemicalElement } from 'elementari'
@@ -101,9 +101,9 @@
101101
3).
102102
</p>
103103
</svelte:fragment>
104-
<svelte:fragment slot="wbm-each-hist">
104+
<svelte:fragment slot="hist-wbm-hull-dist">
105105
{#if browser}
106-
<WbmEachHist />
106+
<HistWbmHullDist />
107107
{/if}
108108
</svelte:fragment>
109109
<div

0 commit comments

Comments
 (0)