File tree 4 files changed +10
-15
lines changed
4 files changed +10
-15
lines changed Original file line number Diff line number Diff line change @@ -8,9 +8,7 @@ Source: [Predicting stable crystalline compounds using chemical similarity](http
8
8
Load with
9
9
10
10
``` py
11
- df_wbm_summary = pd.read_csv( # download wbm-steps-summary.csv (23.31 MB)
12
- " https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
13
- ).set_index(" material_id" )
11
+ df_wbm_summary = pd.read_csv(" data/wbm/2022-10-19-wbm-summary.csv" ).set_index(" material_id" )
14
12
```
15
13
16
14
## Comprehensive Link Collection for WBM dataset
Original file line number Diff line number Diff line change 10
10
from pymatgen .core import Structure
11
11
from tqdm import tqdm
12
12
13
- from matbench_discovery import ROOT , as_dict_handler
13
+ from matbench_discovery import ROOT
14
14
15
15
__author__ = "Janosh Riebesell"
16
16
__date__ = "2022-09-22"
48
48
49
49
50
50
# %% compare against WBM formation energy targets to make sure we got sensible results
51
- df_wbm = pd .read_csv ( # download wbm-steps-summary.csv (23.31 MB)
52
- "https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
53
- ).set_index ("material_id" )
51
+ data_path = f"{ ROOT } /data/wbm/2022-10-19-wbm-summary.csv"
52
+ df_wbm = pd .read_csv (data_path ).set_index ("material_id" )
54
53
55
54
df_bowsr ["e_form_wbm" ] = df_wbm .e_form_per_atom
56
55
71
70
72
71
# %%
73
72
out_path = f"{ ROOT } /models/bowsr/{ today } -bowsr-megnet-wbm-{ task_type } .json.gz"
74
- df_bowsr .reset_index ().to_json (out_path , default_handler = as_dict_handler )
73
+ df_bowsr .reset_index ().to_json (out_path , default_handler = lambda x : x . as_dict () )
75
74
76
75
# out_path = f"{ROOT}/models/bowsr/2022-08-16-bowsr-megnet-wbm-IS2RE.json.gz"
77
76
# df_bowsr = pd.read_json(out_path).set_index("material_id")
Original file line number Diff line number Diff line change 49
49
df = df .dropna () # two missing initial structures
50
50
assert len (df ) == old_len - 2
51
51
52
- assert all (
53
- df .index == df_wbm .drop (index = no_init_structs ).index
54
- ), "df and df_wbm must have same index"
55
- df ["e_form_per_atom_mp2020_corrected" ] = df_wbm .e_form_per_atom_mp2020_corrected
52
+ assert all (df .index == df_wbm .drop (index = no_init_structs ).index )
56
53
57
54
target_col = "e_form_per_atom_mp2020_corrected"
55
+ df [target_col ] = df_wbm [target_col ]
58
56
input_col = "initial_structure"
59
57
assert target_col in df , f"{ target_col = } not in { list (df )} "
60
58
assert input_col in df , f"{ input_col = } not in { list (df )} "
84
82
data_loader = data_loader ,
85
83
)
86
84
87
- df .round (6 ).to_csv (f"{ module_dir } /{ today } -{ run_name } -preds.csv" )
85
+ df .round (6 ).to_csv (f"{ module_dir } /{ today } -{ run_name } -preds.csv" , index = False )
Original file line number Diff line number Diff line change 10
10
from aviary .wrenformer .data import df_to_in_mem_dataloader
11
11
from aviary .wrenformer .model import Wrenformer
12
12
13
+ from matbench_discovery import ROOT
13
14
from matbench_discovery .slurm import slurm_submit_python
14
15
15
16
__author__ = "Janosh Riebesell"
37
38
38
39
39
40
# %%
40
- # download wbm-steps-summary.csv (23.31 MB)
41
- data_path = "https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
41
+ data_path = f"{ ROOT } /data/wbm/2022-10-19-wbm-summary.csv"
42
42
df = pd .read_csv (data_path ).set_index ("material_id" )
43
43
44
44
target_col = "e_form_per_atom"
You can’t perform that action at this time.
0 commit comments