|
2 | 2 | from __future__ import annotations
|
3 | 3 |
|
4 | 4 | import os
|
5 |
| -from datetime import datetime |
6 | 5 | from glob import glob
|
7 | 6 |
|
8 | 7 | import pandas as pd
|
9 | 8 | from pymatgen.analysis.phase_diagram import PDEntry
|
10 |
| -from pymatgen.core import Structure |
11 | 9 | from tqdm import tqdm
|
12 | 10 |
|
13 |
| -from matbench_discovery import ROOT, as_dict_handler |
| 11 | +from matbench_discovery import ROOT, as_dict_handler, today |
14 | 12 | from matbench_discovery.energy import get_e_form_per_atom
|
15 | 13 |
|
16 | 14 | __author__ = "Janosh Riebesell"
|
17 | 15 | __date__ = "2022-08-16"
|
18 | 16 |
|
19 |
| -today = f"{datetime.now():%Y-%m-%d}" |
20 |
| - |
21 | 17 |
|
22 | 18 | # %%
|
23 | 19 | module_dir = os.path.dirname(__file__)
|
|
36 | 32 | for file_path in tqdm(file_paths):
|
37 | 33 | if file_path in dfs:
|
38 | 34 | continue
|
39 |
| - try: |
40 |
| - # keep whole dataframe in memory |
41 |
| - df = pd.read_json(file_path).set_index("material_id") |
42 |
| - df.index.name = "material_id" |
43 |
| - col_map = dict( |
44 |
| - final_structure="m3gnet_structure", trajectory="m3gnet_trajectory" |
45 |
| - ) |
46 |
| - df = df.rename(columns=col_map) |
47 |
| - df.reset_index().to_json(file_path) |
48 |
| - df[f"m3gnet_energy_{task_type}"] = df.m3gnet_trajectory.map( |
49 |
| - lambda x: x["energies"][-1][0] |
50 |
| - ) |
51 |
| - df["m3gnet_structure"] = df.m3gnet_structure.map(Structure.from_dict) |
52 |
| - df["formula"] = df.m3gnet_structure.map(lambda x: x.alphabetical_formula) |
53 |
| - df["m3gnet_volume"] = df.m3gnet_structure.map(lambda x: x.volume) |
54 |
| - df["n_sites"] = df.m3gnet_structure.map(len) |
55 |
| - # drop trajectory to save memory |
56 |
| - dfs[file_path] = df.drop(columns=["m3gnet_trajectory"]) |
57 |
| - except FileNotFoundError: |
58 |
| - continue |
| 35 | + df = pd.read_json(file_path).set_index("material_id") |
| 36 | + df.index.name = "material_id" |
| 37 | + col_map = dict(final_structure="structure_m3gnet", trajectory="m3gnet_trajectory") |
| 38 | + df = df.rename(columns=col_map) |
| 39 | + df.reset_index().to_json(file_path) |
| 40 | + df[f"m3gnet_energy_{task_type}"] = df.m3gnet_trajectory.map( |
| 41 | + lambda x: x["energies"][-1][0] |
| 42 | + ) |
| 43 | + # drop trajectory to save memory |
| 44 | + dfs[file_path] = df.drop(columns=["m3gnet_trajectory"]) |
59 | 45 |
|
60 | 46 |
|
61 | 47 | # %%
|
|
64 | 50 |
|
65 | 51 | # %%
|
66 | 52 | df_m3gnet["e_form_per_atom_m3gnet"] = [
|
67 |
| - get_e_form_per_atom(PDEntry(row.m3gnet_structure.composition, row.m3gnet_energy)) |
| 53 | + get_e_form_per_atom(PDEntry(row.structure_m3gnet.composition, row.m3gnet_energy)) |
68 | 54 | for row in tqdm(df_m3gnet.itertuples(), total=len(df_m3gnet), disable=None)
|
69 | 55 | ]
|
70 | 56 | df_m3gnet.isna().sum()
|
|
0 commit comments