Skip to content

Commit 2073300

Browse files
committed
don't hydrate pseudo-relaxed structures in join_{bowsr,m3gnet}_results.py
1 parent f9c348c commit 2073300

13 files changed

+38
-89
lines changed

data/mp/build_phase_diagram.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import json
44
import os
55
import pickle
6-
from datetime import datetime
76

87
import pandas as pd
98
import pymatviz
@@ -12,10 +11,9 @@
1211
from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry
1312
from pymatgen.ext.matproj import MPRester
1413

15-
from matbench_discovery import ROOT
14+
from matbench_discovery import ROOT, today
1615
from matbench_discovery.energy import get_e_form_per_atom, get_elemental_ref_entries
1716

18-
today = f"{datetime.now():%Y-%m-%d}"
1917
module_dir = os.path.dirname(__file__)
2018

2119

data/mp/get_mp_energies.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# %%
22
import os
3-
from datetime import datetime
43

54
import pandas as pd
65
from aviary.utils import as_dict_handler
76
from aviary.wren.utils import get_aflow_label_from_spglib
87
from mp_api.client import MPRester
98
from tqdm import tqdm
109

10+
from matbench_discovery import today
11+
1112
"""
1213
Download all MP formation and above hull energies on 2022-08-13.
1314
@@ -18,7 +19,7 @@
1819
__author__ = "Janosh Riebesell"
1920
__date__ = "2022-08-13"
2021

21-
today = f"{datetime.now():%Y-%m-%d}"
22+
2223
module_dir = os.path.dirname(__file__)
2324

2425

data/wbm/compare_cse_vs_ce_mp_2020_corrections.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import gzip
33
import json
44
import warnings
5-
from datetime import datetime
65

76
import pandas as pd
87
from pymatgen.entries.compatibility import (
@@ -12,7 +11,7 @@
1211
from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry
1312
from tqdm import tqdm
1413

15-
from matbench_discovery import ROOT
14+
from matbench_discovery import ROOT, today
1615
from matbench_discovery.energy import get_e_form_per_atom
1716
from matbench_discovery.plot_scripts import df_wbm
1817
from matbench_discovery.plots import plt
@@ -23,7 +22,6 @@
2322
ComputedStructureEntry, not ComputedEntry when applying corrections.
2423
"""
2524

26-
today = f"{datetime.now():%Y-%m-%d}"
2725

2826
cse_path = f"{ROOT}/data/wbm/2022-10-19-wbm-cses.json.bz2"
2927
df_cse = pd.read_json(cse_path).set_index("material_id")
@@ -101,7 +99,7 @@
10199

102100
ax.axline((0, 0), slope=1, color="gray", linestyle="dashed", zorder=-1)
103101

104-
# ax.figure.savefig(f"{ROOT}/tmp/{today}-ce-vs-cse-corrections-outliers.pdf")
102+
ax.figure.savefig(f"{ROOT}/tmp/{today}-ce-vs-cse-corrections-outliers.pdf")
105103

106104

107105
# %%
@@ -122,7 +120,7 @@
122120
# insight: all materials for which ComputedEntry and ComputedStructureEntry give
123121
# different formation energies are oxides or sulfides for which MP 2020 compat takes
124122
# into account structural information to make more accurate corrections.
125-
# ax.figure.savefig(f"{ROOT}/tmp/{today}-ce-vs-cse-outliers.pdf")
123+
ax.figure.savefig(f"{ROOT}/tmp/{today}-ce-vs-cse-e-form-outliers.pdf")
126124

127125

128126
# %% below code resulted in

data/wbm/fetch_process_wbm_dataset.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import pickle
55
import urllib.request
66
import warnings
7-
from datetime import datetime
87
from glob import glob
98

109
import pandas as pd
@@ -21,7 +20,7 @@
2120
from pymatviz import density_scatter
2221
from tqdm import tqdm
2322

24-
from matbench_discovery import ROOT
23+
from matbench_discovery import ROOT, today
2524
from matbench_discovery.energy import get_e_form_per_atom
2625

2726
try:
@@ -40,7 +39,6 @@
4039

4140

4241
module_dir = os.path.dirname(__file__)
43-
today = f"{datetime.now():%Y-%m-%d}"
4442
warnings.filterwarnings("ignore", category=UserWarning, module="pymatgen")
4543

4644

matbench_discovery/plot_scripts/hist_classified_stable_vs_hull_dist.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
# %%
2-
from datetime import datetime
3-
42
import pandas as pd
53

6-
from matbench_discovery import ROOT
4+
from matbench_discovery import ROOT, today
75
from matbench_discovery.plot_scripts import df_wbm
86
from matbench_discovery.plots import (
97
StabilityCriterion,
@@ -25,8 +23,6 @@
2523
See fig. S1 in https://science.org/doi/10.1126/sciadv.abn4117.
2624
"""
2725

28-
today = f"{datetime.now():%Y-%m-%d}"
29-
3026

3127
# %%
3228
df = pd.read_csv(
@@ -78,5 +74,7 @@
7874
title=f"Enrichment Factor = {metrics['enrichment']:.3}",
7975
)
8076

81-
fig_name = f"wren-wbm-hull-dist-hist-{which_energy=}-{stability_crit=}"
82-
# fig.savefig(f"{ROOT}/figures/{today}-{fig_name}.pdf")
77+
78+
# %%
79+
fig_name = f"{today}-wren-wbm-hull-dist-hist-{which_energy=}-{stability_crit=}"
80+
# fig.savefig(f"{ROOT}/figures/{fig_name}.pdf")

matbench_discovery/plot_scripts/hist_classified_stable_vs_hull_dist_batches.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
# %%
2-
from datetime import datetime
3-
42
import pandas as pd
53
import pymatviz
64

7-
from matbench_discovery import ROOT
5+
from matbench_discovery import ROOT, today
86
from matbench_discovery.plot_scripts import df_wbm
97
from matbench_discovery.plots import (
108
StabilityCriterion,
@@ -27,8 +25,6 @@
2725
See fig. S1 in https://science.org/doi/10.1126/sciadv.abn4117.
2826
"""
2927

30-
today = f"{datetime.now():%Y-%m-%d}"
31-
3228

3329
# %%
3430
dfs = {}

matbench_discovery/plot_scripts/precision_recall.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
11
# %%
2-
from datetime import datetime
3-
4-
import matplotlib.pyplot as plt
52
import pandas as pd
63
from sklearn.metrics import f1_score
74

8-
from matbench_discovery import ROOT
5+
from matbench_discovery import ROOT, today
96
from matbench_discovery.plot_scripts import df_wbm
10-
from matbench_discovery.plots import StabilityCriterion, cumulative_clf_metric
7+
from matbench_discovery.plots import StabilityCriterion, cumulative_clf_metric, plt
118

129
__author__ = "Rhys Goodall, Janosh Riebesell"
1310

14-
today = f"{datetime.now():%Y-%m-%d}"
15-
1611

1712
# %%
1813
dfs: dict[str, pd.DataFrame] = {}
@@ -118,10 +113,11 @@
118113
# x-ticks every 10k materials
119114
# ax.set(xticks=range(0, int(ax.get_xlim()[1]), 10_000))
120115

121-
fig.suptitle(f"{today} ")
116+
fig.suptitle(f"{today} {stability_crit=}")
122117
xlabel_cumulative = "Materials predicted stable sorted by hull distance"
123118
fig.text(0.5, -0.08, xlabel_cumulative, ha="center")
124119

125120

126121
# %%
127-
# fig.savefig(f"{ROOT}/figures/{today}-precision-recall-curves.pdf")
122+
img_path = f"{ROOT}/figures/{today}-precision-recall-curves.pdf"
123+
# fig.savefig(img_path)

matbench_discovery/plot_scripts/rolling_mae_vs_hull_dist.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
# %%
2-
from datetime import datetime
3-
42
import pandas as pd
53

6-
from matbench_discovery import ROOT
4+
from matbench_discovery import ROOT, today
75
from matbench_discovery.plot_scripts import df_wbm
86
from matbench_discovery.plots import rolling_mae_vs_hull_dist
97

108
__author__ = "Rhys Goodall, Janosh Riebesell"
119
__date__ = "2022-06-18"
1210

13-
today = f"{datetime.now():%Y-%m-%d}"
14-
1511

1612
# %%
1713
markers = ["o", "v", "^", "H", "D", ""]

matbench_discovery/plot_scripts/rolling_mae_vs_hull_dist_wbm_batches.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
# %%
2-
from datetime import datetime
3-
42
import pandas as pd
53

6-
from matbench_discovery import ROOT
4+
from matbench_discovery import ROOT, today
75
from matbench_discovery.plot_scripts import df_wbm
86
from matbench_discovery.plots import plt, rolling_mae_vs_hull_dist
97

108
__author__ = "Rhys Goodall, Janosh Riebesell"
119
__date__ = "2022-06-18"
1210

13-
today = f"{datetime.now():%Y-%m-%d}"
14-
1511

1612
# %%
1713
df_wren = pd.read_csv(
@@ -70,4 +66,4 @@
7066

7167

7268
img_name = f"{today}-{model_name}-rolling-mae-vs-hull-dist-wbm-batches"
73-
fig.savefig(f"{ROOT}/figures/{img_name}.pdf")
69+
# fig.savefig(f"{ROOT}/figures/{img_name}.pdf")

models/bowsr/join_bowsr_results.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,22 @@
22
from __future__ import annotations
33

44
import os
5-
from datetime import datetime
65
from glob import glob
76

87
import pandas as pd
98
import pymatviz
10-
from pymatgen.core import Structure
119
from tqdm import tqdm
1210

13-
from matbench_discovery import ROOT
11+
from matbench_discovery import ROOT, today
1412

1513
__author__ = "Janosh Riebesell"
1614
__date__ = "2022-09-22"
1715

18-
today = f"{datetime.now():%Y-%m-%d}"
19-
2016

2117
# %%
2218
module_dir = os.path.dirname(__file__)
2319
task_type = "IS2RE"
24-
date = "2022-09-22"
20+
date = "2022-11-22"
2521
glob_pattern = f"{date}-bowsr-megnet-wbm-{task_type}/*.json.gz"
2622
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
2723
print(f"Found {len(file_paths):,} files for {glob_pattern = }")
@@ -35,10 +31,6 @@
3531
continue
3632
df = pd.read_json(file_path).set_index("material_id")
3733

38-
df["bowsr_structure"] = df.structure_bowsr.map(Structure.from_dict)
39-
df["formula"] = df.structure_bowsr.map(lambda x: x.alphabetical_formula)
40-
df["bowsr_volume"] = df.structure_bowsr.map(lambda x: x.volume)
41-
df["n_sites"] = df.structure_bowsr.map(len)
4234
dfs[file_path] = df
4335

4436

@@ -57,7 +49,7 @@
5749
# %%
5850
pymatviz.density_scatter(
5951
x=df_bowsr.e_form_per_atom_bowsr_megnet,
60-
y=df_bowsr.e_form_wbm,
52+
y=df_wbm.loc[df_bowsr.index].e_form_per_atom_mp2020_corrected,
6153
)
6254

6355

models/m3gnet/eda_wbm_pre_vs_post_m3gnet_relaxation.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
# %%
2-
from datetime import datetime
3-
42
import pandas as pd
53
import plotly.express as px
64
from pymatgen.core import Structure
@@ -9,13 +7,12 @@
97
from pymatviz.utils import add_identity_line
108
from sklearn.metrics import r2_score
119

12-
from matbench_discovery import ROOT, plots
10+
from matbench_discovery import ROOT, plots, today
1311

1412
__author__ = "Janosh Riebesell"
1513
__date__ = "2022-06-18"
1614

1715

18-
today = f"{datetime.now():%Y-%m-%d}"
1916
del plots # https://github.com/PyCQA/pyflakes/issues/366
2017

2118

models/m3gnet/join_m3gnet_results.py

+12-26
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,18 @@
22
from __future__ import annotations
33

44
import os
5-
from datetime import datetime
65
from glob import glob
76

87
import pandas as pd
98
from pymatgen.analysis.phase_diagram import PDEntry
10-
from pymatgen.core import Structure
119
from tqdm import tqdm
1210

13-
from matbench_discovery import ROOT, as_dict_handler
11+
from matbench_discovery import ROOT, as_dict_handler, today
1412
from matbench_discovery.energy import get_e_form_per_atom
1513

1614
__author__ = "Janosh Riebesell"
1715
__date__ = "2022-08-16"
1816

19-
today = f"{datetime.now():%Y-%m-%d}"
20-
2117

2218
# %%
2319
module_dir = os.path.dirname(__file__)
@@ -36,26 +32,16 @@
3632
for file_path in tqdm(file_paths):
3733
if file_path in dfs:
3834
continue
39-
try:
40-
# keep whole dataframe in memory
41-
df = pd.read_json(file_path).set_index("material_id")
42-
df.index.name = "material_id"
43-
col_map = dict(
44-
final_structure="m3gnet_structure", trajectory="m3gnet_trajectory"
45-
)
46-
df = df.rename(columns=col_map)
47-
df.reset_index().to_json(file_path)
48-
df[f"m3gnet_energy_{task_type}"] = df.m3gnet_trajectory.map(
49-
lambda x: x["energies"][-1][0]
50-
)
51-
df["m3gnet_structure"] = df.m3gnet_structure.map(Structure.from_dict)
52-
df["formula"] = df.m3gnet_structure.map(lambda x: x.alphabetical_formula)
53-
df["m3gnet_volume"] = df.m3gnet_structure.map(lambda x: x.volume)
54-
df["n_sites"] = df.m3gnet_structure.map(len)
55-
# drop trajectory to save memory
56-
dfs[file_path] = df.drop(columns=["m3gnet_trajectory"])
57-
except FileNotFoundError:
58-
continue
35+
df = pd.read_json(file_path).set_index("material_id")
36+
df.index.name = "material_id"
37+
col_map = dict(final_structure="structure_m3gnet", trajectory="m3gnet_trajectory")
38+
df = df.rename(columns=col_map)
39+
df.reset_index().to_json(file_path)
40+
df[f"m3gnet_energy_{task_type}"] = df.m3gnet_trajectory.map(
41+
lambda x: x["energies"][-1][0]
42+
)
43+
# drop trajectory to save memory
44+
dfs[file_path] = df.drop(columns=["m3gnet_trajectory"])
5945

6046

6147
# %%
@@ -64,7 +50,7 @@
6450

6551
# %%
6652
df_m3gnet["e_form_per_atom_m3gnet"] = [
67-
get_e_form_per_atom(PDEntry(row.m3gnet_structure.composition, row.m3gnet_energy))
53+
get_e_form_per_atom(PDEntry(row.structure_m3gnet.composition, row.m3gnet_energy))
6854
for row in tqdm(df_m3gnet.itertuples(), total=len(df_m3gnet), disable=None)
6955
]
7056
df_m3gnet.isna().sum()

0 commit comments

Comments
 (0)