|
| 1 | +# %% |
| 2 | +import gzip |
| 3 | +import json |
1 | 4 | import warnings
|
2 | 5 | from datetime import datetime
|
3 | 6 |
|
|
12 | 15 | from matbench_discovery import ROOT
|
13 | 16 | from matbench_discovery.energy import get_e_form_per_atom
|
14 | 17 | from matbench_discovery.plot_scripts import df_wbm
|
| 18 | +from matbench_discovery.plots import plt |
15 | 19 |
|
16 | 20 | """
|
17 | 21 | NOTE MaterialsProject2020Compatibility takes structural information into account when
|
|
47 | 51 | get_e_form_per_atom(entry) for entry in tqdm(cses)
|
48 | 52 | ]
|
49 | 53 |
|
50 |
| -df_wbm["mp2020_cse_correction"] = [cse.correction for cse in tqdm(cses)] |
51 |
| -df_wbm["mp2020_ce_correction"] = [ce.correction for ce in tqdm(ces)] |
| 54 | +df_wbm["mp2020_cse_correction_per_atom"] = [ |
| 55 | + cse.correction_per_atom for cse in tqdm(cses) |
| 56 | +] |
| 57 | +df_wbm["mp2020_ce_correction_per_atom"] = [ce.correction_per_atom for ce in tqdm(ces)] |
52 | 58 |
|
53 | 59 |
|
54 | 60 | # %%
|
|
81 | 87 |
|
82 | 88 |
|
83 | 89 | # %%
|
| 90 | +ax = plt.gca() |
84 | 91 | for key, df_anion in df_ce_ne_cse.groupby("anion"):
|
85 | 92 | ax = df_anion.plot.scatter(
|
86 |
| - ax=locals().get("ax"), |
87 |
| - x="mp2020_cse_correction", |
88 |
| - y="mp2020_ce_correction", |
| 93 | + ax=ax, |
| 94 | + x="mp2020_cse_correction_per_atom", |
| 95 | + y="mp2020_ce_correction_per_atom", |
89 | 96 | label=f"{key} ({len(df_anion):,})",
|
90 | 97 | color=dict(oxide="orange", sulfide="teal").get(key, "blue"),
|
91 |
| - title=f"Outliers in formation energy from CSE vs CE ({len(df_ce_ne_cse):,}" |
92 |
| - f" / {len(df_wbm):,} = {len(df_ce_ne_cse) / len(df_wbm):.1%})", |
| 98 | + title=f"CSE vs CE corrections for ({len(df_ce_ne_cse):,} / {len(df_wbm):,} = " |
| 99 | + f"{len(df_ce_ne_cse) / len(df_wbm):.1%})\n outliers of largest difference", |
93 | 100 | )
|
94 | 101 |
|
95 | 102 | ax.axline((0, 0), slope=1, color="gray", linestyle="dashed", zorder=-1)
|
96 | 103 |
|
| 104 | +# ax.figure.savefig(f"{ROOT}/tmp/{today}-ce-vs-cse-corrections-outliers.pdf") |
| 105 | + |
97 | 106 |
|
98 | 107 | # %%
|
| 108 | +ax = plt.gca() |
99 | 109 | for key, df_anion in df_ce_ne_cse.groupby("anion"):
|
100 | 110 | ax = df_anion.plot.scatter(
|
101 | 111 | ax=locals().get("ax"),
|
|
113 | 123 | # different formation energies are oxides or sulfides for which MP 2020 compat takes
|
114 | 124 | # into account structural information to make more accurate corrections.
|
115 | 125 | # ax.figure.savefig(f"{ROOT}/tmp/{today}-ce-vs-cse-outliers.pdf")
|
| 126 | + |
| 127 | + |
| 128 | +# %% below code resulted in |
| 129 | +# https://github.com/materialsproject/pymatgen/issues/2730 |
| 130 | +wbm_step_2_34803 = ( |
| 131 | + df_ce_ne_cse.e_form_per_atom_mp2020_from_cse |
| 132 | + - df_ce_ne_cse.e_form_per_atom_mp2020_from_ce |
| 133 | +).idxmax() |
| 134 | +idx = df_wbm.index.get_loc(wbm_step_2_34803) |
| 135 | +cse_mp2020, cse_legacy = cses[idx].copy(), cses[idx].copy() |
| 136 | +ce_mp2020, ce_legacy = ces[idx].copy(), ces[idx].copy() |
| 137 | + |
| 138 | + |
| 139 | +with gzip.open(f"{ROOT}/tmp/cse-wbm-step-2-34803.json.zip", "w") as f: |
| 140 | + f.write(cse_mp2020.to_json().encode("utf-8")) |
| 141 | + |
| 142 | +with gzip.open(f"{ROOT}/tmp/cse-wbm-step-2-34803.json.zip") as f: |
| 143 | + cse = ComputedStructureEntry.from_dict(json.load(f)) |
| 144 | + |
| 145 | +cse_mp2020 = cse.copy() |
| 146 | +cse_legacy = cse.copy() |
| 147 | +ce_mp2020 = ComputedEntry.from_dict(cse.to_dict()) |
| 148 | +ce_legacy = ce_mp2020.copy() |
| 149 | + |
| 150 | + |
| 151 | +MaterialsProject2020Compatibility().process_entry(cse_mp2020) |
| 152 | +MaterialsProject2020Compatibility().process_entry(ce_mp2020) |
| 153 | +MaterialsProjectCompatibility().process_entry(cse_legacy) |
| 154 | +MaterialsProjectCompatibility().process_entry(ce_legacy) |
| 155 | + |
| 156 | +print(f"{cse_mp2020.correction=:.4}") |
| 157 | +print(f"{ce_mp2020.correction=:.4}") |
| 158 | +print(f"{cse_legacy.correction=:.4}") |
| 159 | +print(f"{ce_legacy.correction=:.4}") |
| 160 | + |
| 161 | +print(f"{cse_mp2020.energy_adjustments=}\n") |
| 162 | +print(f"{ce_mp2020.energy_adjustments=}\n") |
| 163 | +print(f"{cse_legacy.energy_adjustments=}\n") |
| 164 | +print(f"{ce_legacy.energy_adjustments=}\n") |
0 commit comments