Skip to content

Commit b3c3aba

Browse files
committed
add models/voronoi/join_voronoi_features.py
also add models/voronoi/readme.md to document OOM errors and set_n_jobs(1) solution
1 parent 8508c38 commit b3c3aba

File tree

6 files changed

+76
-8
lines changed

6 files changed

+76
-8
lines changed

.gitignore

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ models/**/*.csv
2424
# temporary ignore rule
2525
paper
2626
meeting-notes
27-
models/voronoi/*
28-
!models/voronoi/*.py
27+
models/voronoi/*.ipynb
28+
models/voronoi/*.zip
2929
pretrained

models/m3gnet/test_m3gnet.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767

6868
# %%
6969
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-cses+init-structs.json.bz2"
70-
print(f"Loading from {data_path=}")
70+
print(f"{data_path=}")
7171
df_wbm = pd.read_json(data_path).set_index("material_id")
7272

7373
df_this_job: pd.DataFrame = np.array_split(df_wbm, slurm_array_task_count)[

models/megnet/test_megnet.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353

5454
# %%
5555
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
56-
print(f"Loading from {data_path=}")
56+
print(f"{data_path=}")
5757
df_wbm_structs = pd.read_json(data_path).set_index("material_id")
5858

5959
megnet_mp_e_form = load_model(model_name := "Eform_MP_2019")

models/voronoi/featurize_mp_wbm.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
module_dir = os.path.dirname(__file__)
2020

2121

22-
# data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
23-
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
22+
data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
23+
# data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
2424
input_col = "initial_structure"
2525
data_name = "wbm" if "wbm" in data_path else "mp"
2626
slurm_array_task_count = 10
@@ -31,7 +31,7 @@
3131
job_name=job_name,
3232
partition="icelake-himem",
3333
account="LEE-SL3-CPU",
34-
time=(slurm_max_job_time := "5:0:0"),
34+
time=(slurm_max_job_time := "8:0:0"),
3535
array=f"1-{slurm_array_task_count}",
3636
log_dir=log_dir,
3737
)
@@ -45,12 +45,13 @@
4545
if os.path.isfile(out_path):
4646
raise SystemExit(f"{out_path = } already exists, exciting early")
4747

48+
print(f"{data_path=}")
4849
df = pd.read_json(data_path).set_index("material_id")
4950
df_this_job: pd.DataFrame = np.array_split(df, slurm_array_task_count)[
5051
slurm_array_task_id - 1
5152
]
5253

53-
if data_name == "mp":
54+
if data_name == "mp": # extract structure dicts from ComputedStructureEntry
5455
struct_dicts = [x["structure"] for x in df_this_job.entry]
5556
if data_name == "wbm":
5657
struct_dicts = df_this_job.initial_structure
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# %%
2+
from __future__ import annotations
3+
4+
import os
5+
from datetime import datetime
6+
from glob import glob
7+
8+
import pandas as pd
9+
from tqdm import tqdm
10+
11+
__author__ = "Janosh Riebesell"
12+
__date__ = "2022-08-16"
13+
14+
today = f"{datetime.now():%Y-%m-%d}"
15+
16+
17+
# %%
18+
module_dir = os.path.dirname(__file__)
19+
date = "2022-11-18"
20+
glob_pattern = f"{date}-voronoi-features-wbm/voronoi-features-wbm-*.csv.bz2"
21+
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
22+
print(f"Found {len(file_paths):,} files for {glob_pattern = }")
23+
24+
dfs: dict[str, pd.DataFrame] = {}
25+
26+
27+
# %%
28+
# 2022-08-16 tried multiprocessing.Pool() to load files in parallel but was somehow
29+
# slower than serial loading
30+
for file_path in tqdm(file_paths):
31+
if file_path in dfs:
32+
continue
33+
try:
34+
# keep whole dataframe in memory
35+
df = pd.read_csv(file_path).set_index("material_id")
36+
dfs[file_path] = df
37+
except FileNotFoundError:
38+
print(f"{file_path=} not found")
39+
continue
40+
41+
42+
# %%
43+
df_features = pd.concat(dfs.values())
44+
45+
assert df_features.isna().sum().max() <= 18
46+
47+
48+
# %%
49+
out_path = f"{module_dir}/{date}-voronoi-features-wbm.csv.bz2"
50+
df_features.to_csv(out_path)

models/voronoi/readme.md

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Voronoi Tessellation with matminer featurezation piped into `scikit-learn` Random Forest
2+
3+
## OOM errors during featurization
4+
5+
`multiprocessing` seems to be the cause of out-of-memory errors on large structures. Initially couldn't get the `matminer` `MultipleFeaturizer` to run without crashing even when running on small subsets of the data (1%) and setting `sbatch` flag `--mem 100G`:
6+
7+
```log
8+
MultipleFeaturizer: 28%|██▊ | 724/2575 [01:08<04:15, 7.25it/s]/var/spool/slurm/slurmd/job7401930/slurm_script: line 4: 2625851 Killed python
9+
slurmstepd: error: Detected 52 oom-kill event(s) in StepId=7401930.batch cgroup. Some of your processes may have been killed by the cgroup out-of-memory handler.
10+
4:00
11+
```
12+
13+
Saving tip came from [Alex Dunn via Slack](https://berkeleytheory.slack.com/archives/D03ULSTNRMX/p1668746161675349) to set `featurizer.set_n_jobs(1)`.
14+
15+
## Archive
16+
17+
Files in `2022-10-04-rhys-voronoi.zip` received from Rhys via [Slack](https://ml-physics.slack.com/archives/DD8GBBRLN/p1664929946687049). All originals before making any changes for this project.

0 commit comments

Comments
 (0)