add wandb scatter plot logging to slurm_array_megnet_wbm.py

janosh · janosh · commit fed968ff2768 · 2023-06-19T20:29:21.000-07:00
add maml, megnet, m3gnet-dgl to setup.py extras_require running-models
plot density_scatter() at end of use_cgcnn_ensemble.py
diff --git a/models/cgcnn/use_cgcnn_ensemble.py b/models/cgcnn/use_cgcnn_ensemble.py
@@ -10,6 +10,7 @@
 from aviary.cgcnn.model import CrystalGraphConvNet
 from aviary.deploy import predict_from_wandb_checkpoints
 from pymatgen.core import Structure
+from pymatviz import density_scatter
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 
@@ -29,7 +30,7 @@
 module_dir = os.path.dirname(__file__)
 today = f"{datetime.now():%Y-%m-%d}"
 ensemble_id = "cgcnn-e_form-ensemble-1"
-run_name = f"{today}-{ensemble_id}-IS2RE"
+run_name = f"{ensemble_id}-IS2RE"
 
 slurm_submit(
     job_name=run_name,
@@ -82,3 +83,13 @@
 )
 
 df.round(6).to_csv(f"{module_dir}/{today}-{run_name}-preds.csv", index=False)
+
+
+# %%
+print(f"{runs[0].url=}")
+ax = density_scatter(
+    df=df.query("e_form_per_atom_mp2020_corrected < 10"),
+    x="e_form_per_atom_mp2020_corrected",
+    y="e_form_per_atom_mp2020_corrected_pred_1",
+)
+# ax.figure.savefig(f"{ROOT}/tmp/{today}-{run_name}-scatter-preds.png", dpi=300)
diff --git a/models/m3gnet/slurm_array_m3gnet_wbm.py b/models/m3gnet/slurm_array_m3gnet_wbm.py
@@ -56,10 +56,10 @@
 print(f"Job started running {timestamp}")
 print(f"{version('m3gnet') = }")
 
-json_out_path = f"{out_dir}/{slurm_array_task_id}.json.gz"
+out_path = f"{out_dir}/{slurm_array_task_id}.json.gz"
 
-if os.path.isfile(json_out_path):
-    raise SystemExit(f"{json_out_path = } already exists, exciting early")
+if os.path.isfile(out_path):
+    raise SystemExit(f"{out_path = } already exists, exciting early")
 
 warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen")
 warnings.filterwarnings(action="ignore", category=UserWarning, module="tensorflow")
@@ -125,6 +125,6 @@
 df_output = pd.DataFrame(relax_results).T
 df_output.index.name = "material_id"
 
-df_output.reset_index().to_json(json_out_path, default_handler=as_dict_handler)
+df_output.reset_index().to_json(out_path, default_handler=as_dict_handler)
 
-wandb.log_artifact(json_out_path, type=f"m3gnet-wbm-{task_type}")
+wandb.log_artifact(out_path, type=f"m3gnet-wbm-{task_type}")
diff --git a/models/megnet/slurm_array_megnet_wbm.py b/models/megnet/slurm_array_megnet_wbm.py
@@ -5,13 +5,13 @@
 from datetime import datetime
 from importlib.metadata import version
 
-import numpy as np
 import pandas as pd
 import wandb
 from megnet.utils.models import load_model
 from tqdm import tqdm
 
 from matbench_discovery import ROOT
+from matbench_discovery.plot_scripts import df_wbm
 from matbench_discovery.slurm import slurm_submit
 
 """
@@ -23,14 +23,11 @@
 __author__ = "Janosh Riebesell"
 __date__ = "2022-11-14"
 
-task_type = "IS2RE"  # "RS2RE"
+task_type = "IS2RE"
 timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
 today = timestamp.split("@")[0]
 module_dir = os.path.dirname(__file__)
-# set large job array size for fast testing/debugging
-slurm_array_task_count = 1
-slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug")
-job_name = f"megnet-wbm-{task_type}-{slurm_job_id}"
+job_name = f"megnet-wbm-{task_type}"
 out_dir = f"{module_dir}/{today}-{job_name}"
 
 slurm_vars = slurm_submit(
@@ -39,80 +36,93 @@
     partition="icelake-himem",
     account="LEE-SL3-CPU",
     time=(slurm_max_job_time := "12:0:0"),
-    array=f"1-{slurm_array_task_count}",
     # TF_CPP_MIN_LOG_LEVEL=2 means INFO and WARNING logs are not printed
     # https://stackoverflow.com/a/40982782
     pre_cmd="TF_CPP_MIN_LOG_LEVEL=2",
 )
 
 
 # %%
-slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
-
 print(f"Job started running {timestamp}")
 
-json_out_path = f"{out_dir}/{slurm_array_task_id}.json.gz"
-if os.path.isfile(json_out_path):
-    raise SystemExit(f"{json_out_path = } already exists, exciting early")
+out_path = f"{out_dir}/megnet-e-form-preds.csv"
+if os.path.isfile(out_path):
+    raise SystemExit(f"{out_path = } already exists, exciting early")
 
 
 # %%
 data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
 print(f"Loading from {data_path=}")
-df_wbm = pd.read_json(data_path).set_index("material_id")
+df_wbm_structs = pd.read_json(data_path).set_index("material_id")
 
-df_this_job: pd.DataFrame = np.array_split(df_wbm, slurm_array_task_count)[
-    slurm_array_task_id - 1
-]
 
 megnet_mp_e_form = load_model(model_name := "Eform_MP_2019")
 
+
+# %%
 run_params = dict(
     data_path=data_path,
     megnet_version=version("megnet"),
     model_name=model_name,
     task_type=task_type,
     slurm_max_job_time=slurm_max_job_time,
-    df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
+    df=dict(shape=str(df_wbm_structs.shape), columns=", ".join(df_wbm_structs)),
     slurm_vars=slurm_vars,
 )
 if wandb.run is None:
     wandb.login()
 
-wandb.init(
-    project="matbench-discovery",
-    name=f"{job_name}-{slurm_array_task_id}",
-    config=run_params,
-)
+wandb.init(project="matbench-discovery", name=job_name, config=run_params)
 
 
 # %%
 if task_type == "IS2RE":
     from pymatgen.core import Structure
 
-    structures = df_this_job.initial_structure.map(Structure.from_dict)
+    structures = df_wbm_structs.initial_structure.map(Structure.from_dict)
 elif task_type == "RS2RE":
     from pymatgen.entries.computed_entries import ComputedStructureEntry
 
-    df_this_job.cse = df_this_job.cse.map(ComputedStructureEntry.from_dict)
-    structures = df_this_job.cse.map(lambda x: x.structure)
+    df_wbm_structs.cse = df_wbm_structs.cse.map(ComputedStructureEntry.from_dict)
+    structures = df_wbm_structs.cse.map(lambda x: x.structure)
 else:
     raise ValueError(f"Unknown {task_type = }")
 
-megnet_preds = {}
-for material_id, structure in tqdm(structures.items(), disable=None):
-    if material_id in megnet_preds:
+megnet_e_form_preds = {}
+for material_id, structure in tqdm(structures.items(), total=len(structures)):
+    if material_id in megnet_e_form_preds:
         continue
-    e_form_per_atom = megnet_mp_e_form.predict_structure(structure)[0]
-    megnet_preds[material_id] = e_form_per_atom
+    try:
+        e_form_per_atom = megnet_mp_e_form.predict_structure(structure)[0]
+        megnet_e_form_preds[material_id] = e_form_per_atom
+    except Exception as exc:
+        print(f"Failed to predict {material_id=}: {exc}")
+
 
+# %%
+print(f"{len(megnet_e_form_preds)=:,}")
+print(f"{len(structures)=:,}")
+print(f"missing: {len(structures) - len(megnet_e_form_preds):,}")
+out_col = "e_form_per_atom_megnet"
+df_wbm[out_col] = pd.Series(megnet_e_form_preds)
 
-assert len(megnet_preds) == len(structures) == len(df_this_job)
-out_col = "megnet_e_form"
-df_this_job[out_col] = pd.Series(megnet_preds)
+df_wbm[out_col].reset_index().to_csv(out_path)
 
 
 # %%
-df_this_job[out_col].reset_index().to_json(json_out_path)
+fields = {"x": "e_form_per_atom_mp2020_corrected", "y": out_col}
+cols = list(fields.values())
+assert all(col in df_wbm for col in cols)
+
+table = wandb.Table(dataframe=df_wbm[cols].reset_index())
+
+MAE = (df_wbm[fields["x"]] - df_wbm[fields["y"]]).abs().mean()
+
+scatter_plot = wandb.plot_table(
+    vega_spec_name="janosh/scatter-parity",
+    data_table=table,
+    fields=fields,
+    string_fields={"title": f"{model_name} {task_type} {MAE=:.4}"},
+)
 
-wandb.log_artifact(json_out_path, type=f"m3gnet-wbm-{task_type}")
+wandb.log({"true_pred_scatter": scatter_plot})
diff --git a/models/voronoi/featurize_mp_wbm.py b/models/voronoi/featurize_mp_wbm.py
@@ -56,6 +56,7 @@
 ]
 
 
+# %%
 run_params = dict(
     data_path=data_path,
     slurm_max_job_time=slurm_max_job_time,

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@`
`56`	`56`	`]`
`57`	`57`
`58`	`58`
	`59`	`+# %%`
`59`	`60`	`run_params = dict(`
`60`	`61`	`data_path=data_path,`
`61`	`62`	`slurm_max_job_time=slurm_max_job_time,`