Skip to content

Commit eb11ab0

Browse files
committed
add join_mace_results.py
add props n_authors and first_name_mode to References.svelte
1 parent b06c567 commit eb11ab0

11 files changed

+198
-22
lines changed

models/bowsr/join_bowsr_results.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565

6666

6767
# %%
68-
out_path = f"{module_dir}/{glob_pattern.split('/*')[0]}"
68+
out_path = file_paths[0].rsplit("/", 1)[0]
6969
df_bowsr = df_bowsr.round(4)
7070
# save energy and formation energy as fast-loading CSV
7171
df_bowsr.select_dtypes("number").to_csv(f"{out_path}.csv")

models/bowsr/test_bowsr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
try:
127127
struct_bowsr, energy_bowsr = optimizer.get_optimized_structure_and_energy()
128128
except Exception as exc:
129-
print(f"Failed to relax {material_id}: {exc}")
129+
print(f"Failed to relax {material_id}: {exc!r}")
130130

131131
results = {
132132
f"e_form_per_atom_bowsr_{energy_model}": model.predict_energy(struct_bowsr),

models/chgnet/join_chgnet_results.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@
6363

6464

6565
# %%
66-
out_path = f"{module_dir}/{glob_pattern.split('/*')[0]}"
66+
out_path = file_paths[0].rsplit("/", 1)[0]
6767
df_chgnet = df_chgnet.round(4)
6868
df_chgnet.select_dtypes("number").to_csv(f"{out_path}.csv.gz")
6969
df_chgnet.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)
7070

71-
# in_path = f"{module_dir}/2023-03-04-chgnet-wbm-IS2RE.json.gz"
72-
# df_chgnet = pd.read_csv(in_path.replace(".json.gz", ".csv")).set_index("material_id")
73-
# df_chgnet = pd.read_json(in_path).set_index("material_id")
71+
# in_path = f"{module_dir}/2023-03-04-chgnet-wbm-IS2RE"
72+
# df_chgnet = pd.read_csv(f"{in_path}.csv.gz").set_index("material_id")
73+
# df_chgnet = pd.read_json(f"{in_path}.json.gz").set_index("material_id")

models/chgnet/test_chgnet.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
e_pred_col: relax_result["trajectory"].energies[-1],
105105
}
106106
except Exception as exc:
107-
print(f"Failed to relax {material_id}: {exc}")
107+
print(f"Failed to relax {material_id}: {exc!r}")
108108

109109

110110
# %%

models/m3gnet/join_m3gnet_results.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,11 @@
6969
cse: ComputedStructureEntry
7070
for row in tqdm(df_m3gnet.itertuples(), total=len(df_m3gnet)):
7171
mat_id, struct_dict, m3gnet_energy, *_ = row
72-
m3gnet_struct = Structure.from_dict(struct_dict)
73-
df_m3gnet.at[mat_id, struct_col] = m3gnet_struct # noqa: PD008
72+
mlip_struct = Structure.from_dict(struct_dict)
73+
df_m3gnet.at[mat_id, struct_col] = mlip_struct # noqa: PD008
7474
cse = df_cse.loc[mat_id, "cse"]
7575
cse._energy = m3gnet_energy # cse._energy is the uncorrected energy
76-
cse._structure = m3gnet_struct
76+
cse._structure = mlip_struct
7777
df_m3gnet.loc[mat_id, "cse"] = cse
7878

7979

@@ -91,12 +91,12 @@
9191

9292

9393
# %%
94-
out_path = f"{module_dir}/{glob_pattern.split('/*')[0]}"
94+
out_path = file_paths[0].rsplit("/", 1)[0]
9595
df_m3gnet = df_m3gnet.round(4)
9696
df_m3gnet.select_dtypes("number").to_csv(f"{out_path}.csv.gz")
9797
df_m3gnet.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)
9898

9999

100-
# in_path = f"{module_dir}/2022-10-31-m3gnet-wbm-IS2RE.json.gz"
101-
# df_m3gnet = pd.read_csv(in_path.replace(".json.gz", ".csv")).set_index("material_id")
102-
# df_m3gnet = pd.read_json(in_path).set_index("material_id")
100+
# in_path = f"{module_dir}/2022-10-31-m3gnet-wbm-IS2RE"
101+
# df_m3gnet = pd.read_csv(f"{in_path}.csv.gz").set_index("material_id")
102+
# df_m3gnet = pd.read_json(f"{in_path}.json.gz").set_index("material_id")

models/m3gnet/test_m3gnet.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@
112112
e_pred_col: relax_result["trajectory"].energies[-1],
113113
}
114114
except Exception as exc:
115-
print(f"Failed to relax {material_id}: {exc}")
115+
print(f"Failed to relax {material_id}: {exc!r}")
116116

117117

118118
# %%

models/mace/join_mace_results.py

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""Concatenate MACE results from multiple data files generated by slurm job array
2+
into single file.
3+
"""
4+
5+
6+
# %%
7+
from __future__ import annotations
8+
9+
import os
10+
import warnings
11+
from glob import glob
12+
13+
import pandas as pd
14+
from pymatgen.core import Structure
15+
from pymatgen.entries.compatibility import MaterialsProject2020Compatibility
16+
from pymatgen.entries.computed_entries import ComputedStructureEntry
17+
from pymatviz import density_scatter
18+
from tqdm import tqdm
19+
20+
from matbench_discovery.data import DATA_FILES, as_dict_handler, df_wbm
21+
from matbench_discovery.energy import get_e_form_per_atom
22+
from matbench_discovery.preds import e_form_col
23+
24+
__author__ = "Janosh Riebesell"
25+
__date__ = "2023-03-01"
26+
27+
warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen")
28+
29+
30+
# %%
31+
module_dir = os.path.dirname(__file__)
32+
task_type = "IS2RE"
33+
date = "2023-08-14"
34+
glob_pattern = f"{date}-mace-wbm-{task_type}*/*.json.gz"
35+
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
36+
print(f"Found {len(file_paths):,} files for {glob_pattern = }")
37+
struct_col = "mace_structure"
38+
39+
dfs: dict[str, pd.DataFrame] = {}
40+
41+
42+
# %%
43+
for file_path in tqdm(file_paths):
44+
if file_path in dfs:
45+
continue
46+
df = pd.read_json(file_path).set_index("material_id")
47+
# drop trajectory to save memory
48+
dfs[file_path] = df.drop(columns="mace_trajectory")
49+
50+
df_mace = pd.concat(dfs.values()).round(4)
51+
52+
53+
# %%
54+
df_cse = pd.read_json(DATA_FILES.wbm_computed_structure_entries).set_index(
55+
"material_id"
56+
)
57+
58+
entry_col = "computed_structure_entry"
59+
df_cse[entry_col] = [
60+
ComputedStructureEntry.from_dict(dct)
61+
for dct in tqdm(df_cse.computed_structure_entry)
62+
]
63+
64+
65+
# %% transfer mace energies and relaxed structures WBM CSEs since MP2020 energy
66+
# corrections applied below are structure-dependent (for oxides and sulfides)
67+
cse: ComputedStructureEntry
68+
for row in tqdm(df_mace.itertuples(), total=len(df_mace)):
69+
mat_id, struct_dict, mace_energy, *_ = row
70+
mlip_struct = Structure.from_dict(struct_dict)
71+
df_mace.at[mat_id, struct_col] = mlip_struct # noqa: PD008
72+
cse = df_cse.loc[mat_id, entry_col]
73+
cse._energy = mace_energy # cse._energy is the uncorrected energy
74+
cse._structure = mlip_struct
75+
df_mace.loc[mat_id, entry_col] = cse
76+
77+
78+
# %% apply energy corrections
79+
out = MaterialsProject2020Compatibility().process_entries(
80+
df_mace.cse, verbose=True, clean=True
81+
)
82+
assert len(out) == len(df_mace)
83+
84+
85+
# %% compute corrected formation energies
86+
e_form_mace_col = "e_form_per_atom_mace"
87+
df_mace["formula"] = df_wbm.formula
88+
df_mace[e_form_mace_col] = [
89+
get_e_form_per_atom(dict(energy=cse.energy, composition=formula))
90+
for formula, cse in tqdm(
91+
df_mace.set_index("formula")[entry_col].items(), total=len(df_mace)
92+
)
93+
]
94+
df_wbm[e_form_mace_col] = df_mace[e_form_mace_col]
95+
96+
97+
# %%
98+
bad_mask = (df_wbm[e_form_col] - df_wbm[e_form_mace_col]).abs() > 10
99+
ax = density_scatter(df=df_wbm[bad_mask], x=e_form_col, y=e_form_mace_col)
100+
101+
102+
# %%
103+
out_path = file_paths[0].rsplit("/", 1)[0]
104+
df_mace = df_mace.round(4)
105+
df_mace[~bad_mask].select_dtypes("number").to_csv(f"{out_path}.csv.gz")
106+
df_mace.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)
107+
108+
df_bad = df_mace[bad_mask].drop(columns=[entry_col, struct_col])
109+
df_bad[e_form_col] = df_wbm[e_form_col]
110+
df_bad.to_csv(f"{out_path}-bad.csv")
111+
112+
# in_path = f"{module_dir}/2023-08-14-mace-wbm-IS2RE-FIRE"
113+
# df_mace = pd.read_csv(f"{in_path}.csv.gz").set_index("material_id")
114+
# df_mace = pd.read_json(f"{in_path}.json.gz").set_index("material_id")

models/mace/test_mace.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
job_name = f"mace-wbm-{task_type}-{ase_optimizer}"
3333
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3434
relax_cell = True
35+
# MACE trained on M3GNet training set by original MACE authors
36+
# model_name = "2023-07-14-mace-ilyes-trained-MPF-2021-2-8-big-128-6"
37+
# MACE trained on CHGNet training set by Yuan Chiang
38+
model_name = "2023-08-14-mace-yuan-trained-mptrj-04"
3539

3640
slurm_vars = slurm_submit(
3741
job_name=job_name,
@@ -62,6 +66,7 @@
6266
e_pred_col = "mace_energy"
6367
max_steps = 500
6468
force_max = 0.05 # Run until the forces are smaller than this in eV/A
69+
checkpoint = f"{ROOT}/models/mace/{model_name}.model"
6570

6671
df_in: pd.DataFrame = np.array_split(
6772
pd.read_json(data_path).set_index("material_id"), slurm_array_task_count
@@ -70,6 +75,7 @@
7075
run_params = dict(
7176
data_path=data_path,
7277
versions={dep: version(dep) for dep in ("mace", "numpy", "torch")},
78+
checkpoint=checkpoint,
7379
task_type=task_type,
7480
df=dict(shape=str(df_in.shape), columns=", ".join(df_in)),
7581
slurm_vars=slurm_vars,
@@ -84,8 +90,6 @@
8490

8591

8692
# %%
87-
checkpoint = f"{ROOT}/models/mace/2023-07-14-mace-universal-2-big-128-6.model"
88-
# load MACE model pre-trained on M3GNet training set by original MACE authors
8993
mace_calc = MACECalculator(checkpoint, device="cuda", default_dtype="float32")
9094
relax_results: dict[str, dict[str, Any]] = {}
9195
input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type]
@@ -131,7 +135,7 @@
131135
"mace_trajectory": mace_traj, # Add the trajectory to the results
132136
}
133137
except Exception as exc:
134-
print(f"Failed to relax {material_id}: {exc}")
138+
print(f"Failed to relax {material_id}: {exc!r}")
135139
continue
136140

137141

models/voronoi/join_voronoi_features.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,5 @@
3939

4040

4141
# %%
42-
out_path = f"{module_dir}/{glob_pattern.split('-*')[0]}.csv.bz2"
43-
df_features.to_csv(out_path)
42+
out_path = file_paths[0].rsplit("/", 1)[0]
43+
df_features.to_csv(f"{out_path}.csv.bz2")

site/src/lib/References.svelte

+18-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
export let references: Reference[]
66
export let ref_selector: string = `a.ref[href^='#']`
77
export let found_on_page: Reference[] = references
8+
export let n_authors: number = 1
9+
export let first_name_mode: 'initial' | 'full' | 'none' = `none`
810
911
function filter_refs() {
1012
const ref_links = document.querySelectorAll<HTMLAnchorElement>(ref_selector)
@@ -20,10 +22,24 @@
2022
<li>
2123
<strong {id}>{title}</strong>
2224
<span>
23-
{@html author.map((a) => `${a.given} ${a.family}`).join(`, &thinsp; `)}
25+
{@html author
26+
.slice(0, n_authors)
27+
.map((auth) => {
28+
const { given, family } = auth
29+
const first_name = {
30+
initial: `${given[0]}. `,
31+
full: `${given} `,
32+
none: ``,
33+
}[first_name_mode]
34+
return `${first_name ?? ``}${family}`
35+
})
36+
.join(`,&thinsp; `)}
37+
{#if author.length > n_authors}
38+
<em>et al.</em>
39+
{/if}
2440
</span>
25-
&mdash;
2641
<small>
42+
&mdash;
2743
{#if DOI}
2844
<a href="https://doi.org/{DOI}">{DOI}</a>
2945
{:else if href}

site/src/routes/preprint/references.yaml

+42
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,48 @@ references:
11781178
URL: https://link.aps.org/doi/10.1103/PhysRevB.99.014104
11791179
volume: '99'
11801180

1181+
- id: draxl_nomad_2018
1182+
abstract: >-
1183+
, Data are a crucial raw material of this century. The amount of data that
1184+
have been created in materials science thus far and that continues to be
1185+
created every day is immense. Without a proper infrastructure that allows
1186+
for collecting and sharing data, the envisioned success of big data-driven
1187+
materials science will be hampered. For the field of computational materials
1188+
science, the NOMAD (Novel Materials Discovery) Center of Excellence (CoE)
1189+
has changed the scientific culture toward comprehensive and findable,
1190+
accessible, interoperable, and reusable (FAIR) data, opening new avenues for
1191+
mining materials science big data. Novel data-analytics concepts and tools
1192+
turn data into knowledge and help in the prediction of new materials and in
1193+
the identification of new properties of already known materials.
1194+
accessed:
1195+
- year: 2020
1196+
month: 7
1197+
day: 29
1198+
author:
1199+
- family: Draxl
1200+
given: Claudia
1201+
- family: Scheffler
1202+
given: Matthias
1203+
citation-key: draxl_nomad_2018
1204+
container-title: MRS Bulletin
1205+
DOI: 10.1557/mrs.2018.208
1206+
ISSN: 0883-7694, 1938-1425
1207+
issue: '9'
1208+
issued:
1209+
- year: 2018
1210+
month: 9
1211+
language: en
1212+
note: 'ZSCC: 0000084'
1213+
page: 676-682
1214+
publisher: Cambridge University Press
1215+
source: Cambridge University Press
1216+
title: 'NOMAD: The FAIR concept for big data-driven materials science'
1217+
title-short: NOMAD
1218+
type: article-journal
1219+
URL: >-
1220+
https://www.cambridge.org/core/journals/mrs-bulletin/article/nomad-the-fair-concept-for-big-datadriven-materials-science/1EEF321F62D41997CA16AD367B74C4B0
1221+
volume: '43'
1222+
11811223
- id: dunn_benchmarking_2020
11821224
abstract: >-
11831225
We present a benchmark test suite and an automated machine learning

0 commit comments

Comments
 (0)