|
3 | 3 | import warnings
|
4 | 4 | from datetime import datetime
|
5 | 5 |
|
| 6 | +import numpy as np |
6 | 7 | import pandas as pd
|
| 8 | +import wandb |
7 | 9 | from matminer.featurizers.base import MultipleFeaturizer
|
8 | 10 | from matminer.featurizers.composition import (
|
9 | 11 | ElementProperty,
|
|
32 | 34 | data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
|
33 | 35 | input_col = "structure"
|
34 | 36 | data_name = "wbm" if "wbm" in data_path else "mp"
|
| 37 | +slurm_array_task_count = 100 |
| 38 | +job_name = f"voronoi-featurize-{data_name}" |
35 | 39 |
|
36 |
| -slurm_submit_python( |
37 |
| - job_name=f"voronoi-featurize-{data_name}", |
| 40 | +slurm_vars = slurm_submit_python( |
| 41 | + job_name=job_name, |
38 | 42 | partition="icelake-himem",
|
39 | 43 | account="LEE-SL3-CPU",
|
40 |
| - time="3:0:0", |
| 44 | + time=(slurm_max_job_time := "3:0:0"), |
| 45 | + array=f"1-{slurm_array_task_count}", |
41 | 46 | log_dir=module_dir,
|
42 |
| - slurm_flags=("--mem=40G",), |
43 |
| -) |
44 |
| - |
45 |
| - |
46 |
| -# %% Create the featurizer: Ward et al. use a variety of different featurizers |
47 |
| -# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104 |
48 |
| -featurizer = MultipleFeaturizer( |
49 |
| - [ |
50 |
| - SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"), |
51 |
| - StructuralHeterogeneity(), |
52 |
| - ChemicalOrdering(), |
53 |
| - MaximumPackingEfficiency(), |
54 |
| - SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"), |
55 |
| - StructureComposition(Stoichiometry()), |
56 |
| - StructureComposition(ElementProperty.from_preset("magpie")), |
57 |
| - StructureComposition(ValenceOrbital(props=["frac"])), |
58 |
| - StructureComposition(IonProperty(fast=True)), |
59 |
| - ], |
60 | 47 | )
|
61 | 48 |
|
62 | 49 |
|
63 | 50 | # %%
|
64 | 51 | df = pd.read_json(data_path).set_index("material_id")
|
65 | 52 |
|
| 53 | +slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) |
| 54 | +df_this_job: pd.DataFrame = np.array_split(df, slurm_array_task_count)[ |
| 55 | + slurm_array_task_id - 1 |
| 56 | +] |
| 57 | + |
66 | 58 | if data_name == "mp":
|
67 |
| - struct_dicts = [x["structure"] for x in df.entry] |
| 59 | + struct_dicts = [x["structure"] for x in df_this_job.entry] |
68 | 60 | if data_name == "wbm":
|
69 |
| - struct_dicts = df.initial_structure |
| 61 | + struct_dicts = df_this_job.initial_structure |
| 62 | + |
| 63 | +df_this_job[input_col] = [ |
| 64 | + Structure.from_dict(x) for x in tqdm(df_this_job.initial_structure, disable=None) |
| 65 | +] |
70 | 66 |
|
71 |
| -df[input_col] = [ |
72 |
| - Structure.from_dict(x) for x in tqdm(df.initial_structure, disable=None) |
| 67 | + |
| 68 | +run_params = dict( |
| 69 | + data_path=data_path, |
| 70 | + slurm_max_job_time=slurm_max_job_time, |
| 71 | + **slurm_vars, |
| 72 | +) |
| 73 | +if wandb.run is None: |
| 74 | + wandb.login() |
| 75 | + |
| 76 | +wandb.init( |
| 77 | + project="matbench-discovery", |
| 78 | + name=f"{job_name}-{slurm_array_task_id}", |
| 79 | + config=run_params, |
| 80 | +) |
| 81 | + |
| 82 | + |
| 83 | +# %% Create the featurizer: Ward et al. use a variety of different featurizers |
| 84 | +# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104 |
| 85 | +featurizers = [ |
| 86 | + SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"), |
| 87 | + StructuralHeterogeneity(), |
| 88 | + ChemicalOrdering(), |
| 89 | + MaximumPackingEfficiency(), |
| 90 | + SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"), |
| 91 | + StructureComposition(Stoichiometry()), |
| 92 | + StructureComposition(ElementProperty.from_preset("magpie")), |
| 93 | + StructureComposition(ValenceOrbital(props=["frac"])), |
| 94 | + StructureComposition(IonProperty(fast=True)), |
73 | 95 | ]
|
| 96 | +featurizer = MultipleFeaturizer(featurizers) |
74 | 97 |
|
75 | 98 |
|
76 | 99 | # %% prints lots of pymatgen warnings
|
77 | 100 | # > No electronegativity for Ne. Setting to NaN. This has no physical meaning, ...
|
78 | 101 | warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen")
|
79 | 102 |
|
80 | 103 | df_features = featurizer.featurize_dataframe(
|
81 |
| - df, input_col, ignore_errors=True, pbar=True |
| 104 | + df_this_job, input_col, ignore_errors=True, pbar=True |
82 | 105 | )
|
83 | 106 |
|
84 | 107 |
|
|
0 commit comments