Skip to content

Commit 6932567

Browse files
committed
add models/voronoi/featurize_mp_wbm.py
1 parent 86f85f3 commit 6932567

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ models/**/*.csv
2525
# temporary ignore rule
2626
paper
2727
meeting-notes
28-
models/voronoi
28+
models/voronoi/*
29+
!models/voronoi/*.py

models/voronoi/featurize_mp_wbm.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# %%
2+
import os
3+
from datetime import datetime
4+
5+
import pandas as pd
6+
from matminer.featurizers.base import MultipleFeaturizer
7+
from matminer.featurizers.composition import (
8+
ElementProperty,
9+
IonProperty,
10+
Stoichiometry,
11+
ValenceOrbital,
12+
)
13+
from matminer.featurizers.structure import (
14+
ChemicalOrdering,
15+
MaximumPackingEfficiency,
16+
SiteStatsFingerprint,
17+
StructuralHeterogeneity,
18+
StructureComposition,
19+
)
20+
from pymatgen.core import Structure
21+
from tqdm import tqdm
22+
23+
from matbench_discovery import ROOT
24+
25+
today = f"{datetime.now():%Y-%m-%d}"
26+
module_dir = os.path.dirname(__file__)
27+
28+
29+
# %% Create the featurizer: Ward et al. use a variety of different featurizers
30+
# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104
31+
featurizer = MultipleFeaturizer(
32+
[
33+
SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
34+
StructuralHeterogeneity(),
35+
ChemicalOrdering(),
36+
MaximumPackingEfficiency(),
37+
SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"),
38+
StructureComposition(Stoichiometry()),
39+
StructureComposition(ElementProperty.from_preset("magpie")),
40+
StructureComposition(ValenceOrbital(props=["frac"])),
41+
StructureComposition(IonProperty(fast=True)),
42+
],
43+
)
44+
45+
46+
# %%
47+
data_path = f"{ROOT}/data/2022-09-16-all-mp-entries.json.gz"
48+
# data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-cses+init-structs.json.bz2"
49+
df = pd.read_json(data_path).set_index("material_id")
50+
51+
df["structure"] = [Structure.from_dict(x["structure"]) for x in tqdm(df.entry)]
52+
53+
54+
# %%
55+
df_featurized = featurizer.featurize_dataframe(df, "structure", ignore_errors=True)
56+
57+
58+
# %%
59+
df_featurized.to_json(f"{module_dir}/{today}mp-train-voronoi-tesselation.json.gz")

0 commit comments

Comments
 (0)