Skip to content

Commit f9c348c

Browse files
committed
append '-debug' to job_name if debugging automatically
1 parent a2a99f2 commit f9c348c

File tree

10 files changed

+51
-74
lines changed

10 files changed

+51
-74
lines changed

matbench_discovery/__init__.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
from __future__ import annotations
22

33
import os
4+
import sys
45
from collections.abc import Generator, Sequence
6+
from datetime import datetime
57
from typing import Any
68

7-
PKG_DIR = os.path.dirname(__file__)
8-
ROOT = os.path.dirname(PKG_DIR)
9+
ROOT = os.path.dirname(os.path.dirname(__file__))
10+
DEBUG = "slurm-submit" not in sys.argv and "SLURM_JOB_ID" not in os.environ
11+
12+
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
13+
today = timestamp.split("@")[0]
914

1015

1116
def chunks(xs: Sequence[Any], n: int) -> Generator[Sequence[Any], None, None]:

models/bowsr/test_bowsr.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import contextlib
55
import os
6-
from datetime import datetime
76
from importlib.metadata import version
87
from typing import Any
98

@@ -14,7 +13,7 @@
1413
from maml.apps.bowsr.optimizer import BayesianOptimizer
1514
from tqdm import tqdm
1615

17-
from matbench_discovery import ROOT, as_dict_handler
16+
from matbench_discovery import DEBUG, ROOT, as_dict_handler, timestamp, today
1817
from matbench_discovery.slurm import slurm_submit
1918

2019
__author__ = "Janosh Riebesell"
@@ -36,10 +35,8 @@
3635
# see https://stackoverflow.com/a/55431306 for how to change array throttling
3736
# post submission
3837
slurm_max_parallel = 50
39-
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
40-
today = timestamp.split("@")[0]
4138
energy_model = "megnet"
42-
job_name = f"bowsr-{energy_model}-wbm-{task_type}"
39+
job_name = f"bowsr-{energy_model}-wbm-{task_type}{'-debug' if DEBUG else ''}"
4340
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
4441

4542
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
@@ -109,8 +106,7 @@
109106
if wandb.run is None:
110107
wandb.login()
111108

112-
run_name = f"{job_name}-{slurm_array_task_id}"
113-
wandb.init(project="matbench-discovery", name=run_name, config=run_params)
109+
wandb.init(project="matbench-discovery", name=job_name, config=run_params)
114110

115111

116112
# %%

models/cgcnn/test_cgcnn.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from __future__ import annotations
33

44
import os
5-
from datetime import datetime
5+
import sys
66
from importlib.metadata import version
77

88
import pandas as pd
@@ -14,7 +14,7 @@
1414
from torch.utils.data import DataLoader
1515
from tqdm import tqdm
1616

17-
from matbench_discovery import ROOT
17+
from matbench_discovery import DEBUG, ROOT, today
1818
from matbench_discovery.plot_scripts import df_wbm
1919
from matbench_discovery.plots import wandb_log_scatter
2020
from matbench_discovery.slurm import slurm_submit
@@ -28,9 +28,9 @@
2828
stores predictions to CSV.
2929
"""
3030

31-
today = f"{datetime.now():%Y-%m-%d}"
3231
task_type = "RS2RE"
33-
job_name = f"test-cgcnn-wbm-{task_type}"
32+
debug = "slurm-submit" in sys.argv
33+
job_name = f"test-cgcnn-wbm-{task_type}{'-debug' if DEBUG else ''}"
3434
module_dir = os.path.dirname(__file__)
3535
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3636

@@ -93,9 +93,8 @@
9393
slurm_vars=slurm_vars | dict(slurm_max_job_time=slurm_max_job_time),
9494
)
9595

96-
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
97-
run_name = f"{job_name}-{slurm_job_id}"
98-
wandb.init(project="matbench-discovery", name=run_name, config=run_params)
96+
97+
wandb.init(project="matbench-discovery", name=job_name, config=run_params)
9998

10099
cg_data = CrystalGraphData(
101100
df, task_dict={target_col: "regression"}, structure_col=input_col

models/cgcnn/train_cgcnn.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# %%
22
import os
3-
from datetime import datetime
43

54
import pandas as pd
65
from aviary.cgcnn.data import CrystalGraphData, collate_batch
@@ -11,11 +10,11 @@
1110
from torch.utils.data import DataLoader
1211
from tqdm import tqdm
1312

14-
from matbench_discovery import ROOT
13+
from matbench_discovery import DEBUG, ROOT, timestamp, today
1514
from matbench_discovery.slurm import slurm_submit
1615

1716
"""
18-
Train a CGCNN ensemble of size n_ens on target_col of data_path.
17+
Train a CGCNN ensemble on target_col of data_path.
1918
"""
2019

2120
__author__ = "Janosh Riebesell"
@@ -25,12 +24,10 @@
2524
# %%
2625
epochs = 300
2726
target_col = "formation_energy_per_atom"
28-
job_name = f"train-cgcnn-robust-{target_col}"
27+
job_name = f"train-cgcnn-robust-{target_col}{'-debug' if DEBUG else ''}"
2928
print(f"{job_name=}")
3029
robust = "robust" in job_name.lower()
31-
n_ens = 10
32-
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
33-
today = timestamp.split("@")[0]
30+
ensemble_size = 10
3431
module_dir = os.path.dirname(__file__)
3532
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3633

@@ -39,7 +36,7 @@
3936
partition="ampere",
4037
account="LEE-SL3-GPU",
4138
time="8:0:0",
42-
array=f"1-{n_ens}",
39+
array=f"1-{ensemble_size}",
4340
out_dir=out_dir,
4441
slurm_flags=("--nodes", "1", "--gpus-per-node", "1"),
4542
)
@@ -107,7 +104,7 @@
107104
model_params=model_params,
108105
model=model,
109106
optimizer=optimizer,
110-
run_name=job_name,
107+
run_name=f"{job_name}-{slurm_array_task_id}",
111108
swa_start=swa_start,
112109
target_col=target_col,
113110
task_type=task_type,

models/m3gnet/test_m3gnet.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import os
55
import warnings
6-
from datetime import datetime
76
from importlib.metadata import version
87
from typing import Any
98

@@ -13,7 +12,7 @@
1312
from m3gnet.models import Relaxer
1413
from tqdm import tqdm
1514

16-
from matbench_discovery import ROOT, as_dict_handler
15+
from matbench_discovery import DEBUG, ROOT, as_dict_handler, timestamp, today
1716
from matbench_discovery.slurm import slurm_submit
1817

1918
"""
@@ -26,13 +25,11 @@
2625
__date__ = "2022-08-15"
2726

2827
task_type = "IS2RE" # "RS2RE"
29-
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
30-
today = timestamp.split("@")[0]
3128
module_dir = os.path.dirname(__file__)
3229
# set large job array size for fast testing/debugging
3330
slurm_array_task_count = 100
3431
slurm_mem_per_node = 12000
35-
job_name = f"m3gnet-wbm-{task_type}"
32+
job_name = f"m3gnet-wbm-{task_type}{'-debug' if DEBUG else ''}"
3633
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3734

3835
slurm_vars = slurm_submit(
@@ -83,12 +80,8 @@
8380
if wandb.run is None:
8481
wandb.login()
8582

86-
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
87-
wandb.init(
88-
project="matbench-discovery",
89-
name=f"{job_name}-{slurm_job_id}-{slurm_array_task_id}",
90-
config=run_params,
91-
)
83+
run_name = f"{job_name}-{slurm_array_task_id}"
84+
wandb.init(project="matbench-discovery", name=run_name, config=run_params)
9285

9386

9487
# %%

models/megnet/test_megnet.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from __future__ import annotations
33

44
import os
5-
from datetime import datetime
65
from importlib.metadata import version
76

87
import pandas as pd
@@ -11,7 +10,7 @@
1110
from sklearn.metrics import r2_score
1211
from tqdm import tqdm
1312

14-
from matbench_discovery import ROOT
13+
from matbench_discovery import DEBUG, ROOT, timestamp, today
1514
from matbench_discovery.plot_scripts import df_wbm
1615
from matbench_discovery.plots import wandb_log_scatter
1716
from matbench_discovery.slurm import slurm_submit
@@ -26,10 +25,8 @@
2625
__date__ = "2022-11-14"
2726

2827
task_type = "IS2RE"
29-
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
30-
today = timestamp.split("@")[0]
3128
module_dir = os.path.dirname(__file__)
32-
job_name = f"megnet-wbm-{task_type}"
29+
job_name = f"megnet-wbm-{task_type}{'-debug' if DEBUG else ''}"
3330
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3431

3532
slurm_vars = slurm_submit(

models/voronoi/voronoi_featurize_dataset.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
# %%
22
import os
3+
import sys
34
import warnings
4-
from datetime import datetime
55

66
import numpy as np
77
import pandas as pd
88
import wandb
99
from pymatgen.core import Structure
1010
from tqdm import tqdm
1111

12-
from matbench_discovery import ROOT
12+
from matbench_discovery import DEBUG, ROOT, today
1313
from matbench_discovery.slurm import slurm_submit
1414
from models.voronoi import featurizer
1515

16-
today = f"{datetime.now():%Y-%m-%d}"
1716
module_dir = os.path.dirname(__file__)
1817

1918
data_name = "mp" # "mp"
@@ -25,7 +24,8 @@
2524
input_col = "structure"
2625

2726
slurm_array_task_count = 30
28-
job_name = f"voronoi-features-{data_name}"
27+
debug = "slurm-submit" in sys.argv
28+
job_name = f"voronoi-features-{data_name}{'-debug' if DEBUG else ''}"
2929
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3030

3131

@@ -42,7 +42,8 @@
4242

4343
# %%
4444
slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
45-
out_path = f"{out_dir}/{job_name}.csv.bz2"
45+
run_name = f"{job_name}-{slurm_array_task_id}"
46+
out_path = f"{out_dir}/{run_name}.csv.bz2"
4647

4748
if os.path.isfile(out_path):
4849
raise SystemExit(f"{out_path = } already exists, exciting early")
@@ -73,12 +74,7 @@
7374
if wandb.run is None:
7475
wandb.login()
7576

76-
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
77-
wandb.init(
78-
project="matbench-discovery",
79-
name=f"{job_name}-{slurm_job_id}-{slurm_array_task_id}",
80-
config=run_params,
81-
)
77+
wandb.init(project="matbench-discovery", name=run_name, config=run_params)
8278

8379

8480
# %% prints lots of pymatgen warnings

models/wrenformer/test_wrenformer.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from __future__ import annotations
33

44
import os
5-
from datetime import datetime
5+
import sys
66
from importlib.metadata import version
77

88
import pandas as pd
@@ -11,7 +11,7 @@
1111
from aviary.wrenformer.data import df_to_in_mem_dataloader
1212
from aviary.wrenformer.model import Wrenformer
1313

14-
from matbench_discovery import ROOT
14+
from matbench_discovery import DEBUG, ROOT, today
1515
from matbench_discovery.plots import wandb_log_scatter
1616
from matbench_discovery.slurm import slurm_submit
1717

@@ -24,10 +24,10 @@
2424
stores predictions to CSV.
2525
"""
2626

27-
today = f"{datetime.now():%Y-%m-%d}"
2827
task_type = "IS2RE"
2928
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
30-
job_name = "test-wrenformer-wbm-IS2RE"
29+
debug = "slurm-submit" in sys.argv
30+
job_name = f"test-wrenformer-wbm-IS2RE{'-debug' if DEBUG else ''}"
3131
module_dir = os.path.dirname(__file__)
3232
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
3333

@@ -80,10 +80,7 @@
8080
slurm_vars=slurm_vars,
8181
)
8282

83-
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
84-
wandb.init(
85-
project="matbench-discovery", name=f"{job_name}-{slurm_job_id}", config=run_params
86-
)
83+
wandb.init(project="matbench-discovery", name=job_name, config=run_params)
8784

8885

8986
# %%

models/wrenformer/train_wrenformer.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
# %%
22
import os
3-
from datetime import datetime
43
from importlib.metadata import version
54

65
import pandas as pd
76
from aviary.train import df_train_test_split, train_wrenformer
87

9-
from matbench_discovery import ROOT
8+
from matbench_discovery import DEBUG, ROOT, timestamp, today
109
from matbench_discovery.slurm import slurm_submit
1110

1211
"""
13-
Train a Wrenformer ensemble of size n_ens on target_col of data_path.
12+
Train a Wrenformer ensemble on target_col of data_path.
1413
"""
1514

1615
__author__ = "Janosh Riebesell"
@@ -25,10 +24,8 @@
2524
# data_path = f"{ROOT}/data/2022-08-25-m3gnet-trainset-mp-2021-struct-energy.json.gz"
2625
# target_col = "mp_energy_per_atom"
2726
data_name = "m3gnet-trainset" if "m3gnet" in data_path else "mp"
28-
job_name = f"train-wrenformer-robust-{data_name}"
29-
n_ens = 10
30-
timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
31-
today = timestamp.split("@")[0]
27+
job_name = f"train-wrenformer-robust-{data_name}{'-debug' if DEBUG else ''}"
28+
ensemble_size = 10
3229
dataset = "mp"
3330
module_dir = os.path.dirname(__file__)
3431
out_dir = os.environ.get("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
@@ -39,7 +36,7 @@
3936
partition="ampere",
4037
account="LEE-SL3-GPU",
4138
time="8:0:0",
42-
array=f"1-{n_ens}",
39+
array=f"1-{ensemble_size}",
4340
out_dir=out_dir,
4441
slurm_flags=("--nodes", "1", "--gpus-per-node", "1"),
4542
)
@@ -70,15 +67,14 @@
7067
slurm_vars=slurm_vars,
7168
)
7269

73-
slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
7470
train_wrenformer(
75-
run_name=f"{job_name}-{slurm_job_id}-{slurm_array_task_id}",
71+
run_name=f"{job_name}-{slurm_array_task_id}",
7672
train_df=train_df,
7773
test_df=test_df,
7874
target_col=target_col,
7975
task_type="regression",
8076
timestamp=timestamp,
81-
# folds=(n_ens, slurm_array_task_id),
77+
# folds=(ensemble_size, slurm_array_task_id),
8278
epochs=epochs,
8379
checkpoint="wandb", # None | 'local' | 'wandb',
8480
input_col=input_col,

0 commit comments

Comments
 (0)