Skip to content

Commit 9a41f7b

Browse files
committed
rename data files -all-mp-.../-mp-all-... to just -mp-
1 parent f5e755e commit 9a41f7b

File tree

4 files changed

+17
-21
lines changed

4 files changed

+17
-21
lines changed

matbench_discovery/build_phase_diagram.py

+13-15
Original file line numberDiff line numberDiff line change
@@ -26,25 +26,23 @@
2626
pd.Series(
2727
{e.entry_id: e for e in all_mp_computed_structure_entries}
2828
).drop_duplicates().to_json( # mp-15590 appears twice so we drop_duplicates()
29-
f"{ROOT}/data/{today}-all-mp-entries.json.gz", default_handler=lambda x: x.as_dict()
29+
f"{ROOT}/data/{today}-mp-computed-structure-entries.json.gz",
30+
default_handler=lambda x: x.as_dict(),
3031
)
3132

3233

3334
# %%
34-
all_mp_computed_entries = (
35-
pd.read_json(f"{ROOT}/data/2022-09-16-all-mp-entries.json.gz")
36-
.set_index("material_id")
37-
.entry.map(ComputedEntry.from_dict) # drop the structure, just load ComputedEntry
38-
.to_dict()
39-
)
40-
35+
data_path = f"{ROOT}/data/2022-09-16-mp-computed-structure-entries.json.gz"
36+
df = pd.read_json(data_path).set_index("material_id")
37+
# drop the structure, just load ComputedEntry
38+
mp_computed_entries = df.entry.map(ComputedEntry.from_dict).to_dict()
4139

42-
print(f"{len(all_mp_computed_entries) = :,}")
43-
# len(all_mp_computed_entries) = 146,323
40+
print(f"{len(mp_computed_entries) = :,}")
41+
# len(mp_computed_entries) = 146,323
4442

4543

4644
# %% build phase diagram with MP entries only
47-
ppd_mp = PatchedPhaseDiagram(all_mp_computed_entries)
45+
ppd_mp = PatchedPhaseDiagram(mp_computed_entries)
4846
# prints:
4947
# PatchedPhaseDiagram covering 44805 sub-spaces
5048

@@ -76,7 +74,7 @@
7674

7775
# %% merge MP and WBM entries into a single PatchedPhaseDiagram
7876
mp_wbm_ppd = PatchedPhaseDiagram(
79-
wbm_computed_entries + all_mp_computed_entries, verbose=True
77+
wbm_computed_entries + mp_computed_entries, verbose=True
8078
)
8179

8280
# save MP+WBM PPD to disk (not run)
@@ -86,21 +84,21 @@
8684

8785
# %% compute terminal reference entries across all MP (can be used to compute MP
8886
# compatible formation energies quickly)
89-
elemental_ref_entries = get_elemental_ref_entries(all_mp_computed_entries)
87+
elemental_ref_entries = get_elemental_ref_entries(mp_computed_entries)
9088

9189
# save elemental_ref_entries to disk as json
9290
with open(f"{module_dir}/{today}-elemental-ref-entries.json", "w") as file:
9391
json.dump(elemental_ref_entries, file, default=lambda x: x.as_dict())
9492

9593

96-
df_mp = pd.read_json(f"{ROOT}/data/2022-08-13-mp-all-energies.json.gz").set_index(
94+
df_mp = pd.read_json(f"{ROOT}/data/2022-08-13-mp-energies.json.gz").set_index(
9795
"material_id"
9896
)
9997

10098

10199
# %%
102100
df_mp["our_mp_e_form"] = [
103-
get_e_form_per_atom(all_mp_computed_entries[mp_id]) for mp_id in df_mp.index
101+
get_e_form_per_atom(mp_computed_entries[mp_id]) for mp_id in df_mp.index
104102
]
105103

106104

models/voronoi/featurize_mp_wbm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545

4646
# %%
47-
data_path = f"{ROOT}/data/2022-09-16-all-mp-entries.json.gz"
47+
data_path = f"{ROOT}/data/2022-09-16-mp-computed-structure-entries.json.gz"
4848
# data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-cses+init-structs.json.bz2"
4949
df = pd.read_json(data_path).set_index("material_id")
5050

models/wrenformer/mp/get_mp_energies.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@
4848

4949
df["wyckoff"] = [get_aflow_label_from_spglib(x) for x in tqdm(df.structure)]
5050

51-
df.to_json(
52-
f"{ROOT}/data/{today}-mp-all-energies.json.gz", default_handler=as_dict_handler
53-
)
51+
df.to_json(f"{ROOT}/data/{today}-mp-energies.json.gz", default_handler=as_dict_handler)
5452

55-
# df = pd.read_json(f"{ROOT}/data/2022-08-13-mp-all-energies.json.gz")
53+
# df = pd.read_json(f"{ROOT}/data/2022-08-13-mp-energies.json.gz")

models/wrenformer/slurm_array_wrenformer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818

1919
# %%
20-
df_or_path = f"{ROOT}/data/2022-08-13-mp-all-energies.json.gz"
20+
df_or_path = f"{ROOT}/data/2022-08-13-mp-energies.json.gz"
2121
target_col = "energy_per_atom"
2222
# df_or_path = f"{ROOT}/data/2022-08-25-m3gnet-trainset-mp-2021-struct-energy.json.gz"
2323
# target_col = "mp_energy_per_atom"

0 commit comments

Comments
 (0)