Skip to content

Commit 467d777

Browse files
committed
change scatter_hull_dist_models colorscale (turbo->PuOr) to improve outlier visibility on dark bg
1 parent 1f69374 commit 467d777

12 files changed

+74
-60
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ repos:
4242
stages: [commit, commit-msg]
4343
exclude_types: [csv, json, svg]
4444
exclude: ^(.+references.yaml|site/src/figs/.+)$
45-
args: [--ignore-words-list, "nd,te,fpr"]
45+
args: [--ignore-words-list, "nd,te,fpr", --check-filenames]
4646

4747
- repo: https://github.com/pre-commit/mirrors-prettier
4848
rev: v3.0.3

models/chgnet/analyze_chgnet.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,14 @@
2323

2424

2525
# %%
26-
df_chgnet = pd.read_csv(PRED_FILES.CHGNet)
27-
df_chgnet = df_chgnet.set_index(id_col).add_suffix("_2000")
28-
df_chgnet_500 = pd.read_csv(PRED_FILES.CHGNet.replace("-06", "-04"))
29-
df_chgnet_500 = df_chgnet_500.set_index(id_col).add_suffix("_500")
30-
df_chgnet[list(df_chgnet_500)] = df_chgnet_500
26+
df_chgnet = df_chgnet_v030 = pd.read_csv(PRED_FILES.CHGNet)
27+
df_chgnet_v020 = pd.read_csv(
28+
f"{module_dir}/2023-03-06-chgnet-0.2.0-wbm-IS2RE.csv.gz", index_col=id_col
29+
)
3130
df_chgnet["formula"] = df_wbm.formula
3231

33-
e_form_2000 = "e_form_per_atom_chgnet_2000"
34-
e_form_500 = "e_form_per_atom_chgnet_500"
32+
e_form_2000 = "e_form_per_atom_chgnet_relax_steps_2000"
33+
e_form_500 = "e_form_per_atom_chgnet_relax_steps_500"
3534

3635
min_e_diff = 0.1
3736
# structures with smaller energy after longer relaxation need many steps

models/chgnet/metadata.yml

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ authors:
1313
- name: KyuJung Jun
1414
affiliation: UC Berkeley
1515
orcid: https://orcid.org/0000-0003-1974-028X
16+
- name: Janosh Riebesell
17+
affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
18+
19+
orcid: https://orcid.org/0000-0001-5233-3462
1620
- name: Kevin Han
1721
affiliation: UC Berkeley
1822
orcid: https://orcid.org/0000-0002-4028-2108

models/chgnet/test_chgnet.py

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
max_steps=max_steps,
8383
fmax=fmax,
8484
device=device,
85+
trainable_params=chgnet.n_params,
8586
)
8687

8788
run_name = f"{job_name}-{slurm_array_task_id}"

models/m3gnet/test_m3gnet.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929

3030
task_type = "IS2RE" # "RS2RE"
3131
module_dir = os.path.dirname(__file__)
32-
# direct: cluster sampling, ms: manual sampling
33-
model_type: Literal["orig", "direct", "ms"] = "ms"
32+
# direct: DIRECT cluster sampling, ms: manual sampling
33+
model_type: Literal["orig", "direct", "manual-sampling"] = "orig"
3434
# set large job array size for smaller data splits and faster testing/debugging
3535
slurm_array_task_count = 100
3636
job_name = f"m3gnet-{model_type}-wbm-{task_type}"
@@ -74,26 +74,28 @@
7474
pd.read_json(data_path).set_index("material_id"), slurm_array_task_count
7575
)[slurm_array_task_id - 1]
7676

77+
checkpoint = None
78+
if model_type == "direct":
79+
checkpoint = f"{ROOT}/models/m3gnet/2023-05-26-DI-DFTstrictF10-TTRS-128U-442E"
80+
if model_type == "ms":
81+
checkpoint = f"{ROOT}/models/m3gnet/2023-05-26-MS-DFTstrictF10-128U-154E"
82+
relax_results: dict[str, dict[str, Any]] = {}
83+
m3gnet = Relaxer(potential=checkpoint) # load pre-trained M3GNet model
84+
7785
run_params = dict(
7886
data_path=data_path,
7987
versions={dep: version(dep) for dep in ("m3gnet", "numpy")},
8088
task_type=task_type,
8189
df=dict(shape=str(df_in.shape), columns=", ".join(df_in)),
8290
slurm_vars=slurm_vars,
91+
trainable_params=sum(param.numel() for param in m3gnet.parameters()),
8392
)
8493

8594
run_name = f"{job_name}-{slurm_array_task_id}"
8695
wandb.init(project="matbench-discovery", name=run_name, config=run_params)
8796

8897

8998
# %%
90-
checkpoint = None
91-
if model_type == "direct":
92-
checkpoint = f"{ROOT}/models/m3gnet/2023-05-26-DI-DFTstrictF10-TTRS-128U-442E"
93-
if model_type == "ms":
94-
checkpoint = f"{ROOT}/models/m3gnet/2023-05-26-MS-DFTstrictF10-128U-154E"
95-
m3gnet = Relaxer(potential=checkpoint) # load pre-trained M3GNet model
96-
relax_results: dict[str, dict[str, Any]] = {}
9799
input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type]
98100

99101
if task_type == "RS2RE":

models/mace/test_mace.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ase.constraints import ExpCellFilter
1313
from ase.optimize import FIRE, LBFGS
1414
from mace.calculators.mace import MACECalculator
15+
from mace.tools import count_parameters
1516
from pymatgen.core import Structure
1617
from pymatgen.core.trajectory import Trajectory
1718
from pymatgen.io.ase import AseAtomsAdaptor
@@ -25,6 +26,8 @@
2526
__author__ = "Janosh Riebesell"
2627
__date__ = "2023-03-01"
2728

29+
30+
# %%
2831
task_type = "IS2RE" # "RS2RE"
2932
module_dir = os.path.dirname(__file__)
3033
# set large job array size for smaller data splits and faster testing/debugging
@@ -40,15 +43,17 @@
4043
# MACE trained by Yuan Chiang on CHGNet training set
4144
"2023-08-14-mace-yuan-mptrj-04",
4245
"2023-09-03-mace-yuan-mptrj-slower-14-lr-13_run-3",
46+
"2023-10-29-mace-pbenner-mptrj-no-conditional-loss",
4347
][-1]
4448

4549
slurm_vars = slurm_submit(
4650
job_name=job_name,
4751
out_dir=out_dir,
4852
account="matgen",
49-
time="11:55:0",
53+
time="4:55:0",
5054
array=f"1-{slurm_array_task_count}",
51-
slurm_flags="--qos regular --constraint gpu --gpus 1",
55+
# slurm_flags="--qos shared --constraint gpu --gpus 1",
56+
slurm_flags="--qos shared --constraint cpu --mem 16G",
5257
)
5358

5459

@@ -72,11 +77,14 @@
7277
max_steps = 500
7378
force_max = 0.05 # Run until the forces are smaller than this in eV/A
7479
checkpoint = f"{ROOT}/models/mace/checkpoints/{model_name}.model"
80+
mace_calc = MACECalculator(checkpoint, device=device)
7581

7682
df_in: pd.DataFrame = np.array_split(
7783
pd.read_json(data_path).set_index(id_col), slurm_array_task_count
7884
)[slurm_array_task_id - 1]
7985

86+
87+
# %%
8088
run_params = dict(
8189
data_path=data_path,
8290
versions={dep: version(dep) for dep in ("mace", "numpy", "torch")},
@@ -89,14 +97,14 @@
8997
force_max=force_max,
9098
ase_optimizer=ase_optimizer,
9199
device=device,
100+
trainable_params=count_parameters(mace_calc.models[0]),
92101
)
93102

94103
run_name = f"{job_name}-{slurm_array_task_id}"
95104
wandb.init(project="matbench-discovery", name=run_name, config=run_params)
96105

97106

98107
# %%
99-
mace_calc = MACECalculator(checkpoint, device=device, default_dtype="float32")
100108
relax_results: dict[str, dict[str, Any]] = {}
101109
input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type]
102110

models/wrenformer/analyze_wrenformer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas as pd
77
from aviary.wren.utils import get_isopointal_proto_from_aflow
88
from pymatviz import spacegroup_hist, spacegroup_sunburst
9-
from pymatviz.io import df_to_pdf, df_to_svelte_table, save_fig
9+
from pymatviz.io import df_to_html_table, df_to_pdf, save_fig
1010
from pymatviz.ptable import ptable_heatmap_plotly
1111
from pymatviz.utils import add_identity_line, bin_df_cols
1212

@@ -68,7 +68,7 @@
6868

6969
styler = df_proto_counts.head(10).style.background_gradient(cmap="viridis")
7070

71-
df_to_svelte_table(styler, f"{SITE_FIGS}/proto-counts-{model}-failures.svelte")
71+
df_to_html_table(styler, f"{SITE_FIGS}/proto-counts-{model}-failures.svelte")
7272
df_to_pdf(styler, f"{PDF_FIGS}/proto-counts-{model}-failures.pdf")
7373

7474

readme.md

+1-34
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,3 @@
1-
<script>
2-
import { onMount } from 'svelte'
3-
import all_stats from './site/src/routes/models/model-stats.json'
4-
5-
let best = Object.entries(all_stats).reduce(
6-
(acc, [model, stats]) => {
7-
if (stats.F1 > acc.F1) {
8-
return { model, ...stats }
9-
}
10-
return acc
11-
},
12-
{ model: `CHGNet`, F1: 0.6 }
13-
)
14-
15-
let best_report // HTMLDivElement
16-
onMount(async () => {
17-
if (best_report && best) {
18-
best_report.style.display = `block`
19-
20-
const { default: metadata } = await import(
21-
`$root/models/${best.model.toLowerCase()}/metadata.yml`
22-
)
23-
24-
best = { ...best, ...metadata }
25-
console.log(`best`, best)
26-
}
27-
})
28-
</script>
29-
301
<h1 align="center">
312
<img src="https://github.com/janosh/matbench-discovery/raw/main/site/static/favicon.svg" alt="Logo" width="60px"><br>
323
Matbench Discovery
@@ -48,11 +19,7 @@ Matbench Discovery is an [interactive leaderboard](https://janosh.github.io/matb
4819

4920
So far, we've tested 8 models covering multiple methodologies ranging from random forests with structure fingerprints to graph neural networks, from one-shot predictors to iterative Bayesian optimizers and interatomic potential relaxers.
5021

51-
<div bind:this={best_report} style="display: none;">
52-
53-
We find [{best.model}]({best?.repo}) ([paper]({best?.doi})) to achieve the highest F1 score of {best.F1}, $R^2$ of {best.R2} and a discovery acceleration factor (DAF) of {best.DAF} (meaning a ~{Number(best.DAF).toFixed(0)}x higher rate of stable structures compared to dummy selection in our already enriched search space).
54-
55-
</div>
22+
<slot name="best-report" />
5623

5724
Our results show that ML models have become robust enough to deploy them as triaging steps to more effectively allocate compute in high-throughput DFT relaxations. This work provides valuable insights for anyone looking to build large-scale materials databases.
5825

scripts/model_figs/make_metrics_tables.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import numpy as np
1010
import pandas as pd
11-
from pymatviz.io import df_to_pdf, df_to_svelte_table
11+
from pymatviz.io import df_to_html_table, df_to_pdf
1212
from sklearn.dummy import DummyClassifier
1313

1414
from matbench_discovery import PDF_FIGS, SITE_FIGS
@@ -156,7 +156,7 @@
156156
table::-webkit-scrollbar {
157157
display: none; /* Safari and Chrome */
158158
}"""
159-
df_to_svelte_table(
159+
df_to_html_table(
160160
styler,
161161
f"{SITE_FIGS}/metrics-table{label}.svelte",
162162
inline_props="class='roomy'",

scripts/model_figs/scatter_hull_dist_models.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@
150150
range_x=(domain := (-4, 7)),
151151
range_y=domain,
152152
category_orders={facet_col: legend_order},
153-
color_continuous_scale="turbo", # "thermal"
153+
# pick from https://plotly.com/python/builtin-colorscales
154+
color_continuous_scale="agsunset",
154155
)
155156

156157
# manually set colorbar ticks and labels (needed after log1p transform)

site/src/figs/each-scatter-models-5x2.svelte

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

site/src/routes/+page.svelte

+32
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,40 @@
11
<script lang="ts">
22
import MetricsTable from '$figs/metrics-table.svelte'
3+
import type { ModelData } from '$lib'
34
import Readme from '$root/readme.md'
5+
import { onMount } from 'svelte'
6+
import all_stats from './models/model-stats.json'
7+
8+
let best_model = Object.entries(all_stats).reduce((current, [model_name, stats]) => {
9+
if (!current?.F1 || stats.F1 > current.F1) {
10+
return { model_name, ...stats }
11+
}
12+
return current
13+
}, {}) as ModelData
14+
15+
const metadata = import.meta.glob(`$root/models/**/metadata.yml`, {
16+
eager: true,
17+
import: `default`,
18+
}) as Record<string, ModelData | ModelData[]>
19+
20+
onMount(async () => {
21+
if (best_model) {
22+
const md = metadata[`../models/${best_model.model_name.toLowerCase()}/metadata.yml`]
23+
best_model = { ...best_model, ...md }
24+
}
25+
})
426
</script>
527

628
<Readme>
29+
<div slot="best-report">
30+
{#if best_model}
31+
{@const { model_name, F1, R2, DAF, repo, doi } = best_model}
32+
We find <a href={repo}>{model_name}</a> (<a href={doi}>paper</a>) to achieve the
33+
highest F1 score of {F1}, R<sup>2</sup> of {R2}
34+
and a discovery acceleration factor (DAF) of {DAF}
35+
(meaning a ~{Number(DAF).toFixed(0)}x higher rate of stable structures compared to
36+
dummy selection in our already enriched search space).
37+
{/if}
38+
</div>
739
<MetricsTable slot="metrics-table" />
840
</Readme>

0 commit comments

Comments
 (0)