|
10 | 10 | from aviary.wren.utils import get_aflow_label_from_spglib
|
11 | 11 | from pymatgen.analysis.phase_diagram import PatchedPhaseDiagram
|
12 | 12 | from pymatgen.core import Composition, Structure
|
13 |
| -from pymatgen.entries.compatibility import MaterialsProject2020Compatibility |
| 13 | +from pymatgen.entries.compatibility import ( |
| 14 | + MaterialsProject2020Compatibility, |
| 15 | + MaterialsProjectCompatibility, |
| 16 | +) |
14 | 17 | from pymatgen.entries.computed_entries import ComputedStructureEntry
|
15 | 18 | from pymatviz import density_scatter
|
16 | 19 | from pymatviz.utils import save_fig
|
@@ -184,7 +187,6 @@ def increment_wbm_material_id(wbm_id: str) -> str:
|
184 | 187 | cse_step_paths = sorted(glob(f"{module_dir}/raw/wbm-cse-step-*.json.bz2"))
|
185 | 188 | assert len(cse_step_paths) == 5
|
186 | 189 |
|
187 |
| - |
188 | 190 | """
|
189 | 191 | There is a discrepancy of 6 entries between the files on Materials Cloud containing the
|
190 | 192 | ComputedStructureEntries (CSE) and those on Google Drive containing initial+relaxed
|
@@ -496,10 +498,25 @@ def fix_bad_struct_index_mismatch(material_id: str) -> str:
|
496 | 498 | assert all(df_summary.n_sites == [len(cse.structure) for cse in df_wbm.cse])
|
497 | 499 |
|
498 | 500 |
|
499 |
| -compat_out = MaterialsProject2020Compatibility().process_entries( |
500 |
| - entries=df_wbm.cse, clean=True, verbose=True |
| 501 | +# entries are corrected in-place by default so we apply legacy corrections first |
| 502 | +# and then leave the new corrections in place below |
| 503 | +# having both old and new corrections allows updating predictions from older models |
| 504 | +# like MEGNet that were trained on MP release prior to new corrections by subtracting |
| 505 | +# old corrections and adding the new ones |
| 506 | +entries_old_corr = MaterialsProjectCompatibility().process_entries( |
| 507 | + df_wbm.cse, clean=True, verbose=True |
| 508 | +) |
| 509 | +assert len(entries_old_corr) == len(df_wbm), f"{len(entries_old_corr)=} {len(df_wbm)=}" |
| 510 | + |
| 511 | +# extract legacy MP energy corrections to df_megnet |
| 512 | +e_correction_col = "e_correction_per_atom_mp_legacy" |
| 513 | +df_wbm[e_correction_col] = [cse.correction_per_atom for cse in df_wbm.cse] |
| 514 | + |
| 515 | +# clean up legacy corrections and apply new corrections |
| 516 | +entries_new_corr = MaterialsProject2020Compatibility().process_entries( |
| 517 | + df_wbm.cse, clean=True, verbose=True |
501 | 518 | )
|
502 |
| -assert len(compat_out) == len(df_wbm) == len(df_summary) |
| 519 | +assert len(entries_new_corr) == len(df_wbm), f"{len(entries_new_corr)=} {len(df_wbm)=}" |
503 | 520 |
|
504 | 521 | n_corrected = sum(cse.uncorrected_energy != cse.energy for cse in df_wbm.cse)
|
505 | 522 | assert n_corrected == 100_930, f"{n_corrected=} expected 100,930"
|
|
0 commit comments