Skip to content

Commit 73655f6

Browse files
committed
replace figshare wbm-steps-summary.csv imports with data/wbm/2022-10-19-wbm-summary.csv
1 parent 5d9a3b2 commit 73655f6

File tree

4 files changed

+10
-15
lines changed

4 files changed

+10
-15
lines changed

data/wbm/readme.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ Source: [Predicting stable crystalline compounds using chemical similarity](http
88
Load with
99

1010
```py
11-
df_wbm_summary = pd.read_csv( # download wbm-steps-summary.csv (23.31 MB)
12-
"https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
13-
).set_index("material_id")
11+
df_wbm_summary = pd.read_csv("data/wbm/2022-10-19-wbm-summary.csv").set_index("material_id")
1412
```
1513

1614
## Comprehensive Link Collection for WBM dataset

models/bowsr/join_bowsr_results.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pymatgen.core import Structure
1111
from tqdm import tqdm
1212

13-
from matbench_discovery import ROOT, as_dict_handler
13+
from matbench_discovery import ROOT
1414

1515
__author__ = "Janosh Riebesell"
1616
__date__ = "2022-09-22"
@@ -48,9 +48,8 @@
4848

4949

5050
# %% compare against WBM formation energy targets to make sure we got sensible results
51-
df_wbm = pd.read_csv( # download wbm-steps-summary.csv (23.31 MB)
52-
"https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
53-
).set_index("material_id")
51+
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
52+
df_wbm = pd.read_csv(data_path).set_index("material_id")
5453

5554
df_bowsr["e_form_wbm"] = df_wbm.e_form_per_atom
5655

@@ -71,7 +70,7 @@
7170

7271
# %%
7372
out_path = f"{ROOT}/models/bowsr/{today}-bowsr-megnet-wbm-{task_type}.json.gz"
74-
df_bowsr.reset_index().to_json(out_path, default_handler=as_dict_handler)
73+
df_bowsr.reset_index().to_json(out_path, default_handler=lambda x: x.as_dict())
7574

7675
# out_path = f"{ROOT}/models/bowsr/2022-08-16-bowsr-megnet-wbm-IS2RE.json.gz"
7776
# df_bowsr = pd.read_json(out_path).set_index("material_id")

models/cgcnn/use_cgcnn_ensemble.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,10 @@
4949
df = df.dropna() # two missing initial structures
5050
assert len(df) == old_len - 2
5151

52-
assert all(
53-
df.index == df_wbm.drop(index=no_init_structs).index
54-
), "df and df_wbm must have same index"
55-
df["e_form_per_atom_mp2020_corrected"] = df_wbm.e_form_per_atom_mp2020_corrected
52+
assert all(df.index == df_wbm.drop(index=no_init_structs).index)
5653

5754
target_col = "e_form_per_atom_mp2020_corrected"
55+
df[target_col] = df_wbm[target_col]
5856
input_col = "initial_structure"
5957
assert target_col in df, f"{target_col=} not in {list(df)}"
6058
assert input_col in df, f"{input_col=} not in {list(df)}"
@@ -84,4 +82,4 @@
8482
data_loader=data_loader,
8583
)
8684

87-
df.round(6).to_csv(f"{module_dir}/{today}-{run_name}-preds.csv")
85+
df.round(6).to_csv(f"{module_dir}/{today}-{run_name}-preds.csv", index=False)

models/wrenformer/mp/use_wrenformer_ensemble.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from aviary.wrenformer.data import df_to_in_mem_dataloader
1111
from aviary.wrenformer.model import Wrenformer
1212

13+
from matbench_discovery import ROOT
1314
from matbench_discovery.slurm import slurm_submit_python
1415

1516
__author__ = "Janosh Riebesell"
@@ -37,8 +38,7 @@
3738

3839

3940
# %%
40-
# download wbm-steps-summary.csv (23.31 MB)
41-
data_path = "https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
41+
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
4242
df = pd.read_csv(data_path).set_index("material_id")
4343

4444
target_col = "e_form_per_atom"

0 commit comments

Comments
 (0)