Skip to content

Commit 0bf930b

Browse files
kyonofxCompRhysjanosh
authored
Add FAIRChem eSEN models (#226)
* fairchem eSEN model prediction results * raise for all missing models * minor editing * minor style * add geo_opt metrics and reupload all pred files to own figshare articles * fix #214 * upload esen discovery metrics * fix kSRME paths * rename join to fix pre-commit * rename files to fix pre-commit * upload kappa, change script names * fix figshare directories * Revert "raise for all missing models" (breaks tests) This reverts commit cd27049. * test_esen_kappa.py rename SRMERunner to KappaSRMERunner and import from matbench_discovery.phonons - data loading now uses DataFiles * refactor all test_<model>_kappa.py scripts to standardize material IDs and output column names * rename (esen->eSEN)-30m-(mp|oam).yml * add eSEN energy parity plots and per-element EACH errors * fix eSEN YAML paths in Model enum * Update test_esen_kappa.py * upload new kappa_SRME files to figshare --------- Co-authored-by: Rhys Goodall <[email protected]> Co-authored-by: Janosh Riebesell <[email protected]>
1 parent 1d28e07 commit 0bf930b

23 files changed

+1114
-86
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ site/src/routes/api/*.md
4242

4343
# large files
4444
data/*-models-geo-opt-analysis-symprec=*.csv.gz
45+
.cursor

data/mp/build_phase_diagram.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import pickle
1010

1111
import pandas as pd
12-
import pymatviz
1312
import pymatviz as pmv
1413
from pymatgen.analysis.phase_diagram import PatchedPhaseDiagram
1514
from pymatgen.entries.compatibility import MaterialsProject2020Compatibility
@@ -124,7 +123,7 @@
124123

125124

126125
# make sure get_form_energy_per_atom() reproduces MP formation energies
127-
ax = pymatviz.density_scatter(df_mp[Key.form_energy], df_mp[e_form_us])
126+
ax = pmv.density_scatter_plotly(df_mp[Key.form_energy], df_mp[e_form_us])
128127
ax.set(
129128
title="MP Formation Energy Comparison",
130129
xlabel="MP Formation Energy (eV/atom)",

matbench_discovery/enums.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ class Model(Files, base_dir=f"{ROOT}/models"):
310310
# FAIR-Chem
311311
eqv2_s_dens = auto(), "eqV2/eqV2-s-dens-mp.yml"
312312
eqv2_m = auto(), "eqV2/eqV2-m-omat-salex-mp.yml"
313+
esen_30m_mp = auto(), "eSEN/eSEN-30m-mp.yml"
314+
esen_30m_oam = auto(), "eSEN/eSEN-30m-oam.yml"
313315

314316
# GRACE: https://arxiv.org/abs/2311.16326v2
315317
grace_2l_mptrj = auto(), "grace/grace-2l-mptrj.yml"

models/alphanet/test_alphanet_kappa.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -109,20 +109,17 @@
109109
for atoms in tqdm_bar:
110110
mat_id = atoms.info.get(Key.mat_id, f"id-{len(kappa_results)}")
111111
init_info = deepcopy(atoms.info)
112-
mat_name = atoms.info.get("name", "unknown")
113-
112+
formula = atoms.info.get("name", "unknown")
114113
spg_num = MoyoDataset(MoyoAdapter.from_atoms(atoms)).number
115-
mat_desc = f"{mat_name}-{spg_num}"
116-
117114
info_dict = {
118-
"desc": mat_desc,
119-
"name": mat_name,
120-
"initial_space_group_number": spg_num,
115+
Key.desc: mat_id,
116+
Key.formula: formula,
117+
Key.spg_num: spg_num,
121118
"errors": [],
122119
"error_traceback": [],
123120
}
124121

125-
tqdm_bar.set_postfix_str(mat_desc, refresh=True)
122+
tqdm_bar.set_postfix_str(mat_id, refresh=True)
126123

127124
# Initialize relax_dict to avoid "possibly unbound" errors
128125
relax_dict = {
@@ -148,7 +145,7 @@
148145

149146
reached_max_steps = optimizer.step >= max_steps
150147
if reached_max_steps:
151-
print(f"Material {mat_desc=} reached {max_steps=} during relaxation.")
148+
print(f"Material {mat_id=} reached {max_steps=} during relaxation.")
152149

153150
max_stress = atoms.get_stress().reshape((2, 3), order="C").max(axis=1)
154151
atoms.calc = None
@@ -166,7 +163,7 @@
166163
}
167164

168165
except Exception as exc:
169-
warnings.warn(f"Failed to relax {mat_name=}, {mat_id=}: {exc!r}", stacklevel=2)
166+
warnings.warn(f"Failed to relax {formula=}, {mat_id=}: {exc!r}", stacklevel=2)
170167
traceback.print_exc()
171168
info_dict["errors"].append(f"RelaxError: {exc!r}")
172169
info_dict["error_traceback"].append(traceback.format_exc())
@@ -221,8 +218,7 @@
221218
if not ltc_condition:
222219
kappa_results[mat_id] = info_dict | relax_dict | freqs_dict
223220
warnings.warn(
224-
f"Material {mat_desc} imaginary frequencies or broken symmetry.",
225-
stacklevel=2,
221+
f"{mat_id=} has imaginary frequencies or broken symmetry", stacklevel=2
226222
)
227223
continue
228224

models/deepmd/dpa3-v2-mptrj.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ metrics:
124124
pred_col: e_form_per_atom_dp
125125
full_test_set:
126126
F1: 0.774 # fraction
127-
DAF: 4.25 # dimensionless
127+
DAF: 4.249 # dimensionless
128128
Precision: 0.729 # fraction
129129
Recall: 0.825 # fraction
130130
Accuracy: 0.917 # fraction
@@ -133,20 +133,20 @@ metrics:
133133
TNR: 0.936 # fraction
134134
FNR: 0.175 # fraction
135135
TP: 36393.0 # count
136-
FP: 13519.0 # count
137-
TN: 199352.0 # count
136+
FP: 13518.0 # count
137+
TN: 199353.0 # count
138138
FN: 7699.0 # count
139139
MAE: 0.038 # eV/atom
140-
RMSE: 0.082 # eV/atom
141-
R2: 0.796 # dimensionless
142-
missing_preds: 0 # count
140+
RMSE: 0.08 # eV/atom
141+
R2: 0.801 # dimensionless
142+
missing_preds: 2 # count
143143
missing_percent: 0.00% # fraction
144144
most_stable_10k:
145-
F1: 0.980 # fraction
146-
DAF: 6.280 # dimensionless
147-
Precision: 0.960 # fraction
145+
F1: 0.98 # fraction
146+
DAF: 6.28 # dimensionless
147+
Precision: 0.96 # fraction
148148
Recall: 1.0 # fraction
149-
Accuracy: 0.960 # fraction
149+
Accuracy: 0.96 # fraction
150150
TPR: 1.0 # fraction
151151
FPR: 1.0 # fraction
152152
TNR: 0.0 # fraction
@@ -162,7 +162,7 @@ metrics:
162162
missing_percent: 0.00% # fraction
163163
unique_prototypes:
164164
F1: 0.786 # fraction
165-
DAF: 4.760 # dimensionless
165+
DAF: 4.822 # dimensionless
166166
Precision: 0.737 # fraction
167167
Recall: 0.841 # fraction
168168
Accuracy: 0.929 # fraction

models/deepmd/dpa3-v2-openlam.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,13 @@ metrics:
135135
TNR: 0.969 # fraction
136136
FNR: 0.115 # fraction
137137
TP: 39030.0 # count
138-
FP: 6676.0 # count
139-
TN: 206195.0 # count
138+
FP: 6675.0 # count
139+
TN: 206196.0 # count
140140
FN: 5062.0 # count
141141
MAE: 0.022 # eV/atom
142-
RMSE: 0.068 # eV/atom
143-
R2: 0.857 # dimensionless
144-
missing_preds: 0 # count
142+
RMSE: 0.067 # eV/atom
143+
R2: 0.863 # dimensionless
144+
missing_preds: 2 # count
145145
missing_percent: 0.00% # fraction
146146
most_stable_10k:
147147
F1: 0.986 # fraction
@@ -163,11 +163,11 @@ metrics:
163163
missing_preds: 0 # count
164164
missing_percent: 0.00% # fraction
165165
unique_prototypes:
166-
F1: 0.890 # fraction
166+
F1: 0.89 # fraction
167167
DAF: 5.747 # dimensionless
168168
Precision: 0.879 # fraction
169169
Recall: 0.902 # fraction
170-
Accuracy: 0.965 # fraction
170+
Accuracy: 0.966 # fraction
171171
TPR: 0.902 # fraction
172172
FPR: 0.023 # fraction
173173
TNR: 0.977 # fraction

models/deepmd/test_dpa3_kappa.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,20 +103,18 @@
103103
for atoms in tqdm_bar:
104104
mat_id = atoms.info.get(Key.mat_id, f"id-{len(kappa_results)}")
105105
init_info = deepcopy(atoms.info)
106-
mat_name = atoms.info.get("name", "unknown")
106+
formula = atoms.info.get("name", "unknown")
107107

108108
spg_num = MoyoDataset(MoyoAdapter.from_atoms(atoms)).number
109-
mat_desc = f"{mat_name}-{spg_num}"
110-
111109
info_dict = {
112-
"desc": mat_desc,
113-
"name": mat_name,
114-
"initial_space_group_number": spg_num,
110+
Key.desc: mat_id,
111+
Key.formula: formula,
112+
Key.spg_num: spg_num,
115113
"errors": [],
116114
"error_traceback": [],
117115
}
118116

119-
tqdm_bar.set_postfix_str(mat_desc, refresh=True)
117+
tqdm_bar.set_postfix_str(mat_id, refresh=True)
120118

121119
# Initialize relax_dict to avoid "possibly unbound" errors
122120
relax_dict = {
@@ -142,7 +140,7 @@
142140

143141
reached_max_steps = optimizer.step >= max_steps
144142
if reached_max_steps:
145-
print(f"Material {mat_desc=} reached {max_steps=} during relaxation.")
143+
print(f"{mat_id=} reached {max_steps=} during relaxation.")
146144

147145
max_stress = atoms.get_stress().reshape((2, 3), order="C").max(axis=1)
148146
atoms.calc = None
@@ -160,7 +158,7 @@
160158
}
161159

162160
except Exception as exc:
163-
warnings.warn(f"Failed to relax {mat_name=}, {mat_id=}: {exc!r}", stacklevel=2)
161+
warnings.warn(f"Failed to relax {formula=}, {mat_id=}: {exc!r}", stacklevel=2)
164162
traceback.print_exc()
165163
info_dict["errors"].append(f"RelaxError: {exc!r}")
166164
info_dict["error_traceback"].append(traceback.format_exc())
@@ -215,8 +213,7 @@
215213
if not ltc_condition:
216214
kappa_results[mat_id] = info_dict | relax_dict | freqs_dict
217215
warnings.warn(
218-
f"Material {mat_desc} imaginary frequencies or broken symmetry.",
219-
stacklevel=2,
216+
f"{mat_id=} has imaginary frequencies or broken symmetry", stacklevel=2
220217
)
221218
continue
222219

models/eSEN/eSEN-30m-mp.yml

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
model_name: eSEN-30M-MP
2+
model_key: esen-30m-mp
3+
model_version: v2025.03.17
4+
matbench_discovery_version: 1.3.1
5+
date_added: "2025-03-17"
6+
date_published: "2025-03-17"
7+
authors:
8+
- name: Xiang Fu
9+
affiliation: FAIR at Meta
10+
11+
orcid: https://orcid.org/0000-0001-7480-6312
12+
github: https://github.com/kyonofx
13+
corresponding: true
14+
- name: Brandon M. Wood
15+
affiliation: FAIR at Meta
16+
- name: Luis Barroso-Luque
17+
affiliation: FAIR at Meta
18+
- name: Daniel S. Levine
19+
affiliation: FAIR at Meta
20+
- name: Meng Gao
21+
affiliation: FAIR at Meta
22+
- name: Misko Dzamba
23+
affiliation: FAIR at Meta
24+
- name: C. Lawrence Zitnick
25+
affiliation: FAIR at Meta
26+
27+
corresponding: true
28+
29+
repo: https://github.com/FAIR-Chem/fairchem
30+
doi: https://doi.org/10.48550/arXiv.2502.12147
31+
paper: https://arxiv.org/abs/2502.12147
32+
url: https://huggingface.co/fairchem/eSEN
33+
pypi: https://pypi.org/project/fairchem-core
34+
pr_url: https://github.com/janosh/matbench-discovery/pull/226
35+
36+
requirements:
37+
fairchem-core: 1.7.0
38+
39+
openness: OSOD
40+
trained_for_benchmark: false
41+
train_task: S2EFS
42+
test_task: IS2RE-SR
43+
targets: EFS_G
44+
model_type: UIP
45+
model_params: 30_085_121
46+
n_estimators: 1
47+
48+
training_set: [MPtrj]
49+
50+
hyperparams:
51+
max_force: 0.02
52+
max_steps: 500
53+
ase_optimizer: FIRE
54+
cell_filter: FrechetCellFilter
55+
56+
notes:
57+
Description: |
58+
equivariant Smooth Energy Network (eSEN), described in https://arxiv.org/abs/2502.12147.
59+
Training: |
60+
Training was done by: (1) 70-epoch direct pre-training on MPTrj with DeNS; (2) 30-epoch conservative fine-tuning on MPTrj without DeNS.
61+
62+
metrics:
63+
phonons:
64+
kappa_103:
65+
κ_SRME: 0.3398
66+
pred_file: models/eSEN/eSEN-30m-mp/2025-03-18-kappa-103-FIRE-dist=0.03-fmax=1e-4-symprec=1e-5.json.gz
67+
pred_file_url: https://figshare.com/files/53090276
68+
geo_opt:
69+
pred_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-geo-opt.json.gz
70+
pred_file_url: https://figshare.com/files/53054693
71+
struct_col: pred_structure_esen
72+
symprec=1e-5:
73+
rmsd: 0.0142 # Å
74+
n_sym_ops_mae: 3.3144 # unitless
75+
symmetry_decrease: 0.4281 # fraction
76+
symmetry_match: 0.3859 # fraction
77+
symmetry_increase: 0.1282 # fraction
78+
n_structures: 256963 # count
79+
analysis_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-geo-opt-symprec=1e-5-moyo=0.4.2.csv.gz
80+
analysis_file_url: https://figshare.com/files/53054696
81+
symprec=1e-2:
82+
rmsd: 0.0142 # Å
83+
n_sym_ops_mae: 2.521 # unitless
84+
symmetry_decrease: 0.2604 # fraction
85+
symmetry_match: 0.6106 # fraction
86+
symmetry_increase: 0.098 # fraction
87+
n_structures: 256963 # count
88+
analysis_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-geo-opt-symprec=1e-2-moyo=0.4.2.csv.gz
89+
analysis_file_url: https://figshare.com/files/53054375
90+
discovery:
91+
pred_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-IS2RE.csv.gz
92+
pred_file_url: https://figshare.com/files/53054366
93+
pred_col: pred_e_form_per_atom_esen
94+
full_test_set:
95+
F1: 0.819 # fraction
96+
DAF: 4.616 # dimensionless
97+
Precision: 0.792 # fraction
98+
Recall: 0.849 # fraction
99+
Accuracy: 0.936 # fraction
100+
TPR: 0.849 # fraction
101+
FPR: 0.046 # fraction
102+
TNR: 0.954 # fraction
103+
FNR: 0.151 # fraction
104+
TP: 37419.0 # count
105+
FP: 9825.0 # count
106+
TN: 203046.0 # count
107+
FN: 6673.0 # count
108+
MAE: 0.032 # eV/atom
109+
RMSE: 0.077 # eV/atom
110+
R2: 0.818 # dimensionless
111+
missing_preds: 2 # count
112+
missing_percent: 0.00% # fraction
113+
unique_prototypes:
114+
F1: 0.831 # fraction
115+
DAF: 5.26 # dimensionless
116+
Precision: 0.804 # fraction
117+
Recall: 0.861 # fraction
118+
Accuracy: 0.946 # fraction
119+
TPR: 0.861 # fraction
120+
FPR: 0.038 # fraction
121+
TNR: 0.962 # fraction
122+
FNR: 0.139 # fraction
123+
TP: 28722.0 # count
124+
FP: 7000.0 # count
125+
TN: 175114.0 # count
126+
FN: 4652.0 # count
127+
MAE: 0.033 # eV/atom
128+
RMSE: 0.078 # eV/atom
129+
R2: 0.822 # dimensionless
130+
missing_preds: 0 # count
131+
missing_percent: 0.00% # fraction
132+
most_stable_10k:
133+
F1: 0.978 # fraction
134+
DAF: 6.261 # dimensionless
135+
Precision: 0.957 # fraction
136+
Recall: 1.0 # fraction
137+
Accuracy: 0.957 # fraction
138+
TPR: 1.0 # fraction
139+
FPR: 1.0 # fraction
140+
TNR: 0.0 # fraction
141+
FNR: 0.0 # fraction
142+
TP: 9572.0 # count
143+
FP: 428.0 # count
144+
TN: 0.0 # count
145+
FN: 0.0 # count
146+
MAE: 0.035 # eV/atom
147+
RMSE: 0.111 # eV/atom
148+
R2: 0.755 # dimensionless
149+
missing_preds: 0 # count
150+
missing_percent: 0.00% # fraction

0 commit comments

Comments
 (0)