Skip to content

Commit 38a9d7a

Browse files
committed
fix recursion error when saving voronoi-tesselation to JSON
refactor matminer featurizer imports python_slurm_submit() return more env vars rename slurm_submit_python() to slurm_submit()
1 parent 5804d13 commit 38a9d7a

9 files changed

+48
-55
lines changed

matbench_discovery/slurm.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
from datetime import datetime
66

77
SLURM_KEYS = (
8-
"job_id array_task_id array_task_count mem_per_node nodelist submit_host".split()
9-
)
8+
"job_id array_task_id array_task_count mem_per_node nodelist submit_host"
9+
"job_partition job_user job_account tasks_per_node job_qos"
10+
).split()
1011

1112

1213
def _get_calling_file_path(frame: int = 1) -> str:
@@ -22,7 +23,7 @@ def _get_calling_file_path(frame: int = 1) -> str:
2223
return os.path.abspath(caller_path)
2324

2425

25-
def slurm_submit_python(
26+
def slurm_submit(
2627
job_name: str,
2728
log_dir: str,
2829
time: str,
@@ -45,7 +46,7 @@ def slurm_submit_python(
4546
ID and array task ID.
4647
time (str): 'HH:MM:SS' time limit for the job.
4748
py_file_path (str, optional): Path to the python script to be submitted.
48-
Defaults to the path of the file calling slurm_submit_python().
49+
Defaults to the path of the file calling slurm_submit().
4950
partition (str, optional): Slurm partition.
5051
account (str, optional): Account to charge for this job.
5152
slurm_flags (Sequence[str], optional): Extra slurm CLI flags. Defaults to ().

models/bowsr/slurm_array_bowsr_wbm.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from tqdm import tqdm
1616

1717
from matbench_discovery import ROOT, as_dict_handler
18-
from matbench_discovery.slurm import slurm_submit_python
18+
from matbench_discovery.slurm import slurm_submit
1919

2020
__author__ = "Janosh Riebesell"
2121
__date__ = "2022-08-15"
@@ -41,7 +41,7 @@
4141

4242
data_path = f"{ROOT}/data/2022-10-19-wbm-init-structs.json.gz"
4343

44-
slurm_vars = slurm_submit_python(
44+
slurm_vars = slurm_submit(
4545
job_name=job_name,
4646
log_dir=out_dir,
4747
partition="icelake-himem",
@@ -97,9 +97,8 @@
9797
megnet_version=version("megnet"),
9898
optimize_kwargs=optimize_kwargs,
9999
task_type=task_type,
100-
slurm_array_task_count=slurm_array_task_count,
101100
slurm_max_job_time=slurm_max_job_time,
102-
**slurm_vars,
101+
slurm_vars=slurm_vars,
103102
)
104103
if wandb.run is None:
105104
wandb.login()

models/cgcnn/slurm_train_cgcnn_ensemble.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from torch.utils.data import DataLoader
1313
from tqdm import tqdm
1414

15-
from matbench_discovery.slurm import slurm_submit_python
15+
from matbench_discovery.slurm import slurm_submit
1616

1717
"""
1818
Train a Wrenformer ensemble of size n_folds on target_col of data_path.
@@ -33,7 +33,7 @@
3333
today = timestamp.split("@")[0]
3434
log_dir = f"{os.path.dirname(__file__)}/{today}-{run_name}"
3535

36-
slurm_submit_python(
36+
slurm_submit(
3737
job_name=run_name,
3838
partition="ampere",
3939
account="LEE-SL3-GPU",

models/cgcnn/use_cgcnn_ensemble.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from matbench_discovery import ROOT
1717
from matbench_discovery.plot_scripts import df_wbm
18-
from matbench_discovery.slurm import slurm_submit_python
18+
from matbench_discovery.slurm import slurm_submit
1919

2020
__author__ = "Janosh Riebesell"
2121
__date__ = "2022-08-15"
@@ -31,7 +31,7 @@
3131
ensemble_id = "cgcnn-e_form-ensemble-1"
3232
run_name = f"{today}-{ensemble_id}-IS2RE"
3333

34-
slurm_submit_python(
34+
slurm_submit(
3535
job_name=run_name,
3636
partition="ampere",
3737
account="LEE-SL3-GPU",

models/m3gnet/slurm_array_m3gnet_wbm.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from tqdm import tqdm
1515

1616
from matbench_discovery import ROOT, as_dict_handler
17-
from matbench_discovery.slurm import slurm_submit_python
17+
from matbench_discovery.slurm import slurm_submit
1818

1919
"""
2020
To slurm submit this file: python path/to/file.py slurm-submit
@@ -36,7 +36,7 @@
3636
job_name = f"m3gnet-wbm-{task_type}-{slurm_job_id}"
3737
out_dir = f"{module_dir}/{today}-{job_name}"
3838

39-
slurm_vars = slurm_submit_python(
39+
slurm_vars = slurm_submit(
4040
job_name=job_name,
4141
log_dir=out_dir,
4242
partition="icelake-himem",
@@ -77,11 +77,10 @@
7777
run_params = dict(
7878
data_path=data_path,
7979
m3gnet_version=version("m3gnet"),
80-
slurm_array_task_count=slurm_array_task_count,
8180
task_type=task_type,
8281
slurm_max_job_time=slurm_max_job_time,
8382
df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
84-
**slurm_vars,
83+
slurm_vars=slurm_vars,
8584
)
8685
if wandb.run is None:
8786
wandb.login()

models/voronoi/featurize_mp_wbm.py

+25-31
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,34 @@
33
import warnings
44
from datetime import datetime
55

6+
import matminer.featurizers.composition as feat_comp
7+
import matminer.featurizers.structure as feat_struct
68
import numpy as np
79
import pandas as pd
810
import wandb
911
from matminer.featurizers.base import MultipleFeaturizer
10-
from matminer.featurizers.composition import (
11-
ElementProperty,
12-
IonProperty,
13-
Stoichiometry,
14-
ValenceOrbital,
15-
)
16-
from matminer.featurizers.structure import (
17-
ChemicalOrdering,
18-
MaximumPackingEfficiency,
19-
SiteStatsFingerprint,
20-
StructuralHeterogeneity,
21-
StructureComposition,
22-
)
2312
from pymatgen.core import Structure
2413
from tqdm import tqdm
2514

26-
from matbench_discovery import ROOT
27-
from matbench_discovery.slurm import slurm_submit_python
15+
from matbench_discovery import ROOT, as_dict_handler
16+
from matbench_discovery.slurm import slurm_submit
2817

2918
today = f"{datetime.now():%Y-%m-%d}"
3019
module_dir = os.path.dirname(__file__)
3120

3221

3322
# data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
3423
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
35-
input_col = "structure"
24+
input_col = "initial_structure"
3625
data_name = "wbm" if "wbm" in data_path else "mp"
37-
slurm_array_task_count = 100
26+
slurm_array_task_count = 20
3827
job_name = f"voronoi-featurize-{data_name}"
3928

40-
slurm_vars = slurm_submit_python(
29+
slurm_vars = slurm_submit(
4130
job_name=job_name,
4231
partition="icelake-himem",
4332
account="LEE-SL3-CPU",
44-
time=(slurm_max_job_time := "3:0:0"),
33+
time=(slurm_max_job_time := "5:0:0"),
4534
array=f"1-{slurm_array_task_count}",
4635
log_dir=f"{module_dir}/{job_name}",
4736
)
@@ -51,6 +40,8 @@
5140
df = pd.read_json(data_path).set_index("material_id")
5241

5342
slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
43+
run_name = f"{job_name}-{slurm_array_task_id}"
44+
5445
df_this_job: pd.DataFrame = np.array_split(df, slurm_array_task_count)[
5546
slurm_array_task_id - 1
5647
]
@@ -69,30 +60,33 @@
6960
data_path=data_path,
7061
slurm_max_job_time=slurm_max_job_time,
7162
df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
72-
**slurm_vars,
63+
input_col=input_col,
64+
slurm_vars=slurm_vars,
7365
)
7466
if wandb.run is None:
7567
wandb.login()
7668

7769
wandb.init(
7870
project="matbench-discovery",
79-
name=f"{job_name}-{slurm_array_task_id}",
71+
name=run_name,
8072
config=run_params,
8173
)
8274

8375

8476
# %% Create the featurizer: Ward et al. use a variety of different featurizers
8577
# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104
8678
featurizers = [
87-
SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
88-
StructuralHeterogeneity(),
89-
ChemicalOrdering(),
90-
MaximumPackingEfficiency(),
91-
SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"),
92-
StructureComposition(Stoichiometry()),
93-
StructureComposition(ElementProperty.from_preset("magpie")),
94-
StructureComposition(ValenceOrbital(props=["frac"])),
95-
StructureComposition(IonProperty(fast=True)),
79+
feat_struct.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
80+
feat_struct.StructuralHeterogeneity(),
81+
feat_struct.ChemicalOrdering(),
82+
feat_struct.MaximumPackingEfficiency(),
83+
feat_struct.SiteStatsFingerprint.from_preset(
84+
"LocalPropertyDifference_ward-prb-2017"
85+
),
86+
feat_struct.StructureComposition(feat_comp.Stoichiometry()),
87+
feat_struct.StructureComposition(feat_comp.ElementProperty.from_preset("magpie")),
88+
feat_struct.StructureComposition(feat_comp.ValenceOrbital(props=["frac"])),
89+
feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)),
9690
]
9791
featurizer = MultipleFeaturizer(featurizers)
9892

@@ -108,5 +102,5 @@
108102

109103
# %%
110104
df_features.to_json(
111-
f"{module_dir}/{today}-voronoi-tesselation-{data_name}-features.json.gz"
105+
f"{module_dir}/{today}-{run_name}.json.gz", default_handler=as_dict_handler
112106
)

models/wrenformer/mp/use_wrenformer_ensemble.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from aviary.wrenformer.model import Wrenformer
1212

1313
from matbench_discovery import ROOT
14-
from matbench_discovery.slurm import slurm_submit_python
14+
from matbench_discovery.slurm import slurm_submit
1515

1616
__author__ = "Janosh Riebesell"
1717
__date__ = "2022-08-15"
@@ -28,7 +28,7 @@
2828
assert "wbm" in data_path
2929
run_name = "wrenformer-wbm-IS2RE"
3030

31-
slurm_submit_python(
31+
slurm_submit(
3232
job_name=run_name,
3333
partition="ampere",
3434
account="LEE-SL3-GPU",

models/wrenformer/slurm_train_wrenformer_ensemble.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from aviary.train import df_train_test_split, train_wrenformer
77

88
from matbench_discovery import ROOT
9-
from matbench_discovery.slurm import slurm_submit_python
9+
from matbench_discovery.slurm import slurm_submit
1010

1111
"""
1212
Train a Wrenformer ensemble of size n_folds on target_col of data_path.
@@ -31,7 +31,7 @@
3131
dataset = "mp"
3232
log_dir = f"{os.path.dirname(__file__)}/{dataset}/{today}-{run_name}"
3333

34-
slurm_submit_python(
34+
slurm_submit(
3535
job_name=run_name,
3636
partition="ampere",
3737
account="LEE-SL3-GPU",

tests/test_slurm.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88
from pytest import CaptureFixture
99

10-
from matbench_discovery.slurm import _get_calling_file_path, slurm_submit_python
10+
from matbench_discovery.slurm import _get_calling_file_path, slurm_submit
1111

1212
today = f"{datetime.now():%Y-%m-%d}"
1313

@@ -21,7 +21,7 @@ def test_slurm_submit(capsys: CaptureFixture[str], py_file_path: str | None) ->
2121
partition = "fake-partition"
2222
account = "fake-account"
2323

24-
func_call = lambda: slurm_submit_python(
24+
func_call = lambda: slurm_submit(
2525
job_name=job_name,
2626
log_dir=log_dir,
2727
time=time,
@@ -35,10 +35,10 @@ def test_slurm_submit(capsys: CaptureFixture[str], py_file_path: str | None) ->
3535

3636
assert slurm_vars == {"slurm_job_id": "1234"}
3737
stdout, stderr = capsys.readouterr()
38-
# check slurm_submit_python() did nothing in normal mode
38+
# check slurm_submit() did nothing in normal mode
3939
assert stderr == stderr == ""
4040

41-
# check slurm_submit_python() prints cmd and calls subprocess.run() in submit mode
41+
# check slurm_submit() prints cmd and calls subprocess.run() in submit mode
4242
with pytest.raises(SystemExit), patch("sys.argv", ["slurm-submit"]), patch(
4343
"matbench_discovery.slurm.subprocess.run"
4444
) as mock_subprocess_run:

0 commit comments

Comments
 (0)