Skip to content

Commit 65d1b54

Browse files
committed
add models/voronoi/train_voronoi_rf.py
move featurizer def from models/voronoi/featurize_mp_wbm.py to new models/voronoi/__init__.py
1 parent 612d308 commit 65d1b54

File tree

5 files changed

+44
-37
lines changed

5 files changed

+44
-37
lines changed

models/cgcnn/train_cgcnn.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from matbench_discovery.slurm import slurm_submit
1616

1717
"""
18-
Train a Wrenformer ensemble of size n_folds on target_col of data_path.
18+
Train a CGCNN ensemble of size n_folds on target_col of data_path.
1919
"""
2020

2121
__author__ = "Janosh Riebesell"
@@ -25,7 +25,7 @@
2525
# %%
2626
epochs = 300
2727
target_col = "formation_energy_per_atom"
28-
run_name = f"cgcnn-robust-{target_col}-{epochs=}"
28+
run_name = f"cgcnn-robust-{target_col}"
2929
print(f"{run_name=}")
3030
robust = "robust" in run_name.lower()
3131
n_folds = 10

models/m3gnet/test_m3gnet.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@
3232
# set large job array size for fast testing/debugging
3333
slurm_array_task_count = 100
3434
slurm_mem_per_node = 12000
35-
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
36-
job_name = f"m3gnet-wbm-{task_type}-{slurm_job_id}"
35+
job_name = f"m3gnet-wbm-{task_type}"
3736
out_dir = f"{module_dir}/{today}-{job_name}"
3837

3938
slurm_vars = slurm_submit(
@@ -85,9 +84,10 @@
8584
if wandb.run is None:
8685
wandb.login()
8786

87+
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
8888
wandb.init(
8989
project="matbench-discovery",
90-
name=f"{job_name}-{slurm_array_task_id}",
90+
name=f"{job_name}-{slurm_job_id}-{slurm_array_task_id}",
9191
config=run_params,
9292
)
9393

models/voronoi/__init__.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import matminer.featurizers.composition as feat_comp
2+
import matminer.featurizers.structure as feat_struct
3+
from matminer.featurizers.base import MultipleFeaturizer
4+
5+
# Create the featurizer: Ward et al. use a variety of different featurizers
6+
# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104
7+
featurizers = [
8+
feat_struct.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
9+
feat_struct.StructuralHeterogeneity(),
10+
feat_struct.ChemicalOrdering(),
11+
feat_struct.MaximumPackingEfficiency(),
12+
feat_struct.SiteStatsFingerprint.from_preset(
13+
"LocalPropertyDifference_ward-prb-2017"
14+
),
15+
feat_struct.StructureComposition(feat_comp.Stoichiometry()),
16+
feat_struct.StructureComposition(feat_comp.ElementProperty.from_preset("magpie")),
17+
feat_struct.StructureComposition(feat_comp.ValenceOrbital(props=["frac"])),
18+
feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)),
19+
]
20+
featurizer = MultipleFeaturizer(featurizers)
21+
22+
# multiprocessing seems to be the cause of OOM errors on large structures even when
23+
# taking only small slice of the data and launching slurm jobs with --mem 100G
24+
featurizer.set_n_jobs(1)

models/voronoi/featurize_mp_wbm.py models/voronoi/voronoi_featurize_dataset.py

+12-30
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,28 @@
33
import warnings
44
from datetime import datetime
55

6-
import matminer.featurizers.composition as feat_comp
7-
import matminer.featurizers.structure as feat_struct
86
import numpy as np
97
import pandas as pd
108
import wandb
11-
from matminer.featurizers.base import MultipleFeaturizer
129
from pymatgen.core import Structure
1310
from tqdm import tqdm
1411

1512
from matbench_discovery import ROOT, as_dict_handler
1613
from matbench_discovery.slurm import slurm_submit
14+
from models.voronoi import featurizer
1715

1816
today = f"{datetime.now():%Y-%m-%d}"
1917
module_dir = os.path.dirname(__file__)
2018

2119

22-
data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
23-
# data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
24-
input_col = "initial_structure"
25-
data_name = "wbm" if "wbm" in data_path else "mp"
20+
data_name = "mp" # "mp"
21+
if data_name == "wbm":
22+
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
23+
input_col = "initial_structure"
24+
elif data_name == "mp":
25+
data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
26+
input_col = "structure"
27+
2628
slurm_array_task_count = 10
2729
job_name = f"voronoi-features-{data_name}"
2830
log_dir = f"{module_dir}/{today}-{job_name}"
@@ -39,7 +41,8 @@
3941

4042
# %%
4143
slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
42-
run_name = f"{job_name}-{slurm_array_task_id}"
44+
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
45+
run_name = f"{job_name}-{slurm_job_id}-{slurm_array_task_id}"
4346
out_path = f"{log_dir}/{run_name}.csv.bz2"
4447

4548
if os.path.isfile(out_path):
@@ -57,7 +60,7 @@
5760
struct_dicts = df_this_job.initial_structure
5861

5962
df_this_job[input_col] = [
60-
Structure.from_dict(x) for x in tqdm(df_this_job.initial_structure, disable=None)
63+
Structure.from_dict(x) for x in tqdm(struct_dicts, disable=None)
6164
]
6265

6366

@@ -79,27 +82,6 @@
7982
)
8083

8184

82-
# %% Create the featurizer: Ward et al. use a variety of different featurizers
83-
# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104
84-
featurizers = [
85-
feat_struct.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
86-
feat_struct.StructuralHeterogeneity(),
87-
feat_struct.ChemicalOrdering(),
88-
feat_struct.MaximumPackingEfficiency(),
89-
feat_struct.SiteStatsFingerprint.from_preset(
90-
"LocalPropertyDifference_ward-prb-2017"
91-
),
92-
feat_struct.StructureComposition(feat_comp.Stoichiometry()),
93-
feat_struct.StructureComposition(feat_comp.ElementProperty.from_preset("magpie")),
94-
feat_struct.StructureComposition(feat_comp.ValenceOrbital(props=["frac"])),
95-
feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)),
96-
]
97-
featurizer = MultipleFeaturizer(featurizers)
98-
# multiprocessing seems to be the cause of OOM errors on large structures even when
99-
# taking only small slice of the data and launching slurm jobs with --mem 100G
100-
featurizer.set_n_jobs(1)
101-
102-
10385
# %% prints lots of pymatgen warnings
10486
# > No electronegativity for Ne. Setting to NaN. This has no physical meaning, ...
10587
warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen")

models/wrenformer/train_wrenformer.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
# data_path = f"{ROOT}/data/2022-08-25-m3gnet-trainset-mp-2021-struct-energy.json.gz"
2525
# target_col = "mp_energy_per_atom"
2626
data_name = "m3gnet-trainset" if "m3gnet" in data_path else "mp"
27-
run_name = f"train-wrenformer-robust-{data_name}-{target_col}-{epochs=}"
27+
run_name = f"train-wrenformer-robust-{data_name}-{target_col}"
2828
n_folds = 10
2929
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
3030
today = timestamp.split("@")[0]
@@ -65,8 +65,9 @@
6565
test_df=dict(shape=test_df.shape, columns=", ".join(test_df)),
6666
)
6767

68+
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
6869
train_wrenformer(
69-
run_name=run_name,
70+
run_name=f"{run_name}-{slurm_job_id}",
7071
train_df=train_df,
7172
test_df=test_df,
7273
target_col=target_col,

0 commit comments

Comments
 (0)