|
21 | 21 | from matbench_discovery import today
|
22 | 22 | from matbench_discovery.data import DATA_FILES, as_dict_handler
|
23 | 23 | from matbench_discovery.energy import get_e_form_per_atom
|
24 |
| -from matbench_discovery.preds import df_wbm as df_summary |
25 |
| -from matbench_discovery.preds import e_form_col |
| 24 | +from matbench_discovery.preds import df_wbm, e_form_col |
26 | 25 |
|
27 | 26 | __author__ = "Janosh Riebesell"
|
28 | 27 | __date__ = "2023-03-01"
|
|
55 | 54 |
|
56 | 55 |
|
57 | 56 | # %%
|
58 |
| -df_wbm = pd.read_json(DATA_FILES.wbm_computed_structure_entries).set_index( |
| 57 | +df_cse = pd.read_json(DATA_FILES.wbm_computed_structure_entries).set_index( |
59 | 58 | "material_id"
|
60 | 59 | )
|
61 | 60 |
|
62 |
| -df_wbm["cse"] = [ |
63 |
| - ComputedStructureEntry.from_dict(x) for x in tqdm(df_wbm.computed_structure_entry) |
| 61 | +df_cse["cse"] = [ |
| 62 | + ComputedStructureEntry.from_dict(x) for x in tqdm(df_cse.computed_structure_entry) |
64 | 63 | ]
|
65 | 64 |
|
66 | 65 |
|
67 |
| -# %% transfer chgnet energies and relaxed structures WBM CSEs |
| 66 | +# %% transfer CHGNet energies and relaxed structures WBM CSEs since MP2020 energy |
| 67 | +# corrections applied below are structure-dependent (for oxides and sulfides) |
68 | 68 | cse: ComputedStructureEntry
|
69 | 69 | for row in tqdm(df_chgnet.itertuples(), total=len(df_chgnet)):
|
70 | 70 | mat_id, struct_dict, chgnet_energy, *_ = row
|
71 | 71 | chgnet_struct = Structure.from_dict(struct_dict)
|
72 |
| - cse = df_wbm.loc[mat_id, "cse"] |
| 72 | + cse = df_cse.loc[mat_id, "cse"] |
73 | 73 | cse._energy = chgnet_energy # cse._energy is the uncorrected energy
|
74 | 74 | cse._structure = chgnet_struct
|
75 | 75 | df_chgnet.loc[mat_id, "cse"] = cse
|
76 | 76 |
|
77 | 77 |
|
78 |
| -# %% |
79 |
| -df_chgnet["e_form_per_atom_chgnet_uncorrected"] = [ |
80 |
| - get_e_form_per_atom(cse) for cse in tqdm(df_chgnet.cse) |
81 |
| -] |
82 |
| - |
83 |
| - |
84 |
| -# %% apply energy corrections |
| 78 | +# %% apply energy corrections to CSEs with CHGNet |
85 | 79 | out = MaterialsProject2020Compatibility().process_entries(
|
86 | 80 | df_chgnet.cse, verbose=True, clean=True
|
87 | 81 | )
|
88 | 82 | assert len(out) == len(df_chgnet)
|
89 | 83 |
|
90 | 84 |
|
91 | 85 | # %% compute corrected formation energies
|
92 |
| -df_chgnet["e_form_per_atom_chgnet"] = [ |
93 |
| - get_e_form_per_atom(cse) for cse in tqdm(df_chgnet.cse) |
94 |
| -] |
95 |
| - |
96 |
| -df_chgnet[e_form_col] = df_summary[e_form_col] |
| 86 | +e_form_chgnet_col = "e_form_per_atom_chgnet" |
| 87 | +df_chgnet[e_form_chgnet_col] = [get_e_form_per_atom(cse) for cse in tqdm(df_chgnet.cse)] |
97 | 88 |
|
98 | 89 |
|
99 | 90 | # %%
|
100 |
| -ax = density_scatter( |
101 |
| - df=df_chgnet, x="e_form_per_atom_chgnet", y="e_form_per_atom_chgnet_uncorrected" |
102 |
| -) |
103 |
| -ax = density_scatter(df=df_chgnet, x="e_form_per_atom_chgnet", y=e_form_col) |
| 91 | +ax = density_scatter(x=df_wbm[e_form_col], y=df_chgnet[e_form_chgnet_col]) |
104 | 92 |
|
105 | 93 |
|
106 | 94 | # %% load 2019 MEGNet formation energy model
|
|
109 | 97 |
|
110 | 98 |
|
111 | 99 | # %% predict formation energies on chgnet relaxed structure with MEGNet
|
112 |
| -for material_id, cse in tqdm(df_wbm.cse.items(), total=len(df_wbm)): |
| 100 | +for material_id, cse in tqdm(df_cse.cse.items(), total=len(df_cse)): |
113 | 101 | if material_id in megnet_e_form_preds:
|
114 | 102 | continue
|
115 | 103 | try:
|
|
119 | 107 | except Exception as exc:
|
120 | 108 | print(f"Failed to predict {material_id=}: {exc}")
|
121 | 109 |
|
122 |
| -df_chgnet["e_form_per_atom_chgnet_megnet"] = pd.Series(megnet_e_form_preds) |
| 110 | +e_form_megnet_col = "e_form_per_atom_chgnet_megnet" |
| 111 | +# remove legacy MP corrections that MEGNet was trained on and apply newer MP2020 |
| 112 | +# corrections instead |
| 113 | +df_chgnet[e_form_megnet_col] = ( |
| 114 | + pd.Series(megnet_e_form_preds) |
| 115 | + - df_wbm.e_correction_per_atom_mp_legacy |
| 116 | + + df_wbm.e_correction_per_atom_mp2020 |
| 117 | +) |
123 | 118 |
|
124 | 119 | assert (
|
125 | 120 | n_isna := df_chgnet.e_form_per_atom_chgnet_megnet.isna().sum()
|
126 | 121 | ) < 10, f"{n_isna=}, expected 7 or similar"
|
127 | 122 |
|
128 | 123 |
|
129 | 124 | # %%
|
130 |
| -ax = density_scatter( |
131 |
| - df=df_chgnet, x="e_form_per_atom_chgnet_megnet", y="e_form_per_atom_chgnet" |
132 |
| -) |
| 125 | +ax = density_scatter(df=df_chgnet, x=e_form_chgnet_col, y=e_form_megnet_col) |
| 126 | +ax = density_scatter(df=df_chgnet, x=e_form_col, y=e_form_megnet_col) |
133 | 127 |
|
134 | 128 |
|
135 | 129 | # %%
|
|
0 commit comments