fix predict_from_wandb_checkpoints() didn't return ensemble_metrics due to missing target_col

janosh · janosh · commit 5804d13b4b6d · 2023-06-19T20:29:21.000-07:00
change slurm log file ext from .out to .log for better syntax highlighting
diff --git a/.gitignore b/.gitignore
@@ -18,7 +18,7 @@ wandb/
 job-logs/
 
 # slurm logs
-slurm-*out
+*slurm-*.log
 models/**/*.csv
 
 # temporary ignore rule
diff --git a/matbench_discovery/slurm.py b/matbench_discovery/slurm.py
@@ -77,7 +77,7 @@ def slurm_submit_python(
     cmd = [
         *f"sbatch --{partition=} --{account=} --{time=}".replace("'", "").split(),
         *("--job-name", job_name),
-        *("--output", f"{log_dir}/slurm-%A{'-%a' if array else ''}-{today}.out"),
+        *("--output", f"{log_dir}/{today}-slurm-%A{'-%a' if array else ''}.log"),
         *slurm_flags,
         *("--wrap", f"{pre_cmd} python {py_file_path}".strip()),
     ]
diff --git a/models/cgcnn/use_cgcnn_ensemble.py b/models/cgcnn/use_cgcnn_ensemble.py
@@ -60,8 +60,7 @@
 df[input_col] = [Structure.from_dict(x) for x in tqdm(df[input_col], disable=None)]
 
 wandb.login()
-wandb_api = wandb.Api()
-runs = wandb_api.runs(
+runs = wandb.Api().runs(
     "janosh/matbench-discovery", filters={"tags": {"$in": [ensemble_id]}}
 )
 
diff --git a/models/voronoi/featurize_mp_wbm.py b/models/voronoi/featurize_mp_wbm.py
@@ -43,7 +43,7 @@
     account="LEE-SL3-CPU",
     time=(slurm_max_job_time := "3:0:0"),
     array=f"1-{slurm_array_task_count}",
-    log_dir=module_dir,
+    log_dir=f"{module_dir}/{job_name}",
 )
 
 
@@ -68,6 +68,7 @@
 run_params = dict(
     data_path=data_path,
     slurm_max_job_time=slurm_max_job_time,
+    df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
     **slurm_vars,
 )
 if wandb.run is None:
diff --git a/models/wrenformer/mp/use_wrenformer_ensemble.py b/models/wrenformer/mp/use_wrenformer_ensemble.py
@@ -24,8 +24,9 @@
 
 module_dir = os.path.dirname(__file__)
 today = f"{datetime.now():%Y-%m-%d}"
-ensemble_id = "wrenformer-e_form-ensemble-1"
-run_name = f"{today}-{ensemble_id}-IS2RE"
+data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
+assert "wbm" in data_path
+run_name = "wrenformer-wbm-IS2RE"
 
 slurm_submit_python(
     job_name=run_name,
@@ -38,7 +39,6 @@
 
 
 # %%
-data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
 target_col = "e_form_per_atom_mp2020_corrected"
 input_col = "wyckoff_spglib"
 df = pd.read_csv(data_path).dropna(subset=input_col).set_index("material_id")
@@ -58,21 +58,18 @@
 
 # %%
 wandb.login()
-wandb_api = wandb.Api()
-runs = wandb_api.runs(
-    "janosh/matbench-discovery",
-    filters={
-        "$and": [{"created_at": {"$gt": "2022-11-10", "$lt": "2022-11-11"}}],
-        "display_name": "wrenformer-robust-mp-formation_energy_per_atom-epochs=300",
-    },
-)
+filters = {
+    "$and": [{"created_at": {"$gt": "2022-11-10", "$lt": "2022-11-11"}}],
+    "display_name": "wrenformer-robust-mp-formation_energy_per_atom-epochs=300",
+}
+runs = wandb.Api().runs("janosh/matbench-discovery", filters=filters)
 
-assert len(runs) == 10, f"Expected 10 runs, got {len(runs)} for {ensemble_id=}"
+assert len(runs) == 10, f"Expected 10 runs, got {len(runs)} for {filters=}"
 
 
 # %%
-df, ensemble_metrics = predict_from_wandb_checkpoints(
-    runs, data_loader=data_loader, df=df, model_cls=Wrenformer
+df, _ensemble_metrics = predict_from_wandb_checkpoints(
+    runs, data_loader=data_loader, df=df, model_cls=Wrenformer, target_col=target_col
 )
 
 df.round(6).to_csv(f"{module_dir}/{today}-{run_name}-preds.csv")
diff --git a/tests/test_slurm.py b/tests/test_slurm.py
@@ -48,7 +48,7 @@ def test_slurm_submit(capsys: CaptureFixture[str], py_file_path: str | None) ->
 
     sbatch_cmd = (
         f"sbatch --partition={partition} --account={account} --time={time} "
-        f"--job-name {job_name} --output {log_dir}/slurm-%A-{today}.out --test-flag "
+        f"--job-name {job_name} --output {log_dir}/{today}-slurm-%A.log --test-flag "
         f"--wrap python {py_file_path or __file__}"
     ).replace(" --", "\n  --")
     stdout, stderr = capsys.readouterr()

Original file line number	Diff line number	Diff line change
`@@ -77,7 +77,7 @@ def slurm_submit_python(`
`77`	`77`	`cmd = [`
`78`	`78`	`*f"sbatch --{partition=} --{account=} --{time=}".replace("'", "").split(),`
`79`	`79`	`*("--job-name", job_name),`
`80`		`- *("--output", f"{log_dir}/slurm-%A{'-%a' if array else ''}-{today}.out"),`
	`80`	`+ *("--output", f"{log_dir}/{today}-slurm-%A{'-%a' if array else ''}.log"),`
`81`	`81`	`*slurm_flags,`
`82`	`82`	`*("--wrap", f"{pre_cmd} python {py_file_path}".strip()),`
`83`	`83`	`]`
Original file line number	Diff line number	Diff line change
`@@ -60,8 +60,7 @@`
`60`	`60`	`df[input_col] = [Structure.from_dict(x) for x in tqdm(df[input_col], disable=None)]`
`61`	`61`
`62`	`62`	`wandb.login()`
`63`		`-wandb_api = wandb.Api()`
`64`		`-runs = wandb_api.runs(`
	`63`	`+runs = wandb.Api().runs(`
`65`	`64`	`"janosh/matbench-discovery", filters={"tags": {"$in": [ensemble_id]}}`
`66`	`65`	`)`
`67`	`66`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@`
`43`	`43`	`account="LEE-SL3-CPU",`
`44`	`44`	`time=(slurm_max_job_time := "3:0:0"),`
`45`	`45`	`array=f"1-{slurm_array_task_count}",`
`46`		`- log_dir=module_dir,`
	`46`	`+ log_dir=f"{module_dir}/{job_name}",`
`47`	`47`	`)`
`48`	`48`
`49`	`49`
`@@ -68,6 +68,7 @@`
`68`	`68`	`run_params = dict(`
`69`	`69`	`data_path=data_path,`
`70`	`70`	`slurm_max_job_time=slurm_max_job_time,`
	`71`	`+ df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),`
`71`	`72`	`**slurm_vars,`
`72`	`73`	`)`
`73`	`74`	`if wandb.run is None:`