|
3 | 3 | import warnings
|
4 | 4 | from datetime import datetime
|
5 | 5 |
|
6 |
| -import matminer.featurizers.composition as feat_comp |
7 |
| -import matminer.featurizers.structure as feat_struct |
8 | 6 | import numpy as np
|
9 | 7 | import pandas as pd
|
10 | 8 | import wandb
|
11 |
| -from matminer.featurizers.base import MultipleFeaturizer |
12 | 9 | from pymatgen.core import Structure
|
13 | 10 | from tqdm import tqdm
|
14 | 11 |
|
15 | 12 | from matbench_discovery import ROOT, as_dict_handler
|
16 | 13 | from matbench_discovery.slurm import slurm_submit
|
| 14 | +from models.voronoi import featurizer |
17 | 15 |
|
18 | 16 | today = f"{datetime.now():%Y-%m-%d}"
|
19 | 17 | module_dir = os.path.dirname(__file__)
|
20 | 18 |
|
21 | 19 |
|
22 |
| -data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz" |
23 |
| -# data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2" |
24 |
| -input_col = "initial_structure" |
25 |
| -data_name = "wbm" if "wbm" in data_path else "mp" |
| 20 | +data_name = "mp" # "mp" |
| 21 | +if data_name == "wbm": |
| 22 | + data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2" |
| 23 | + input_col = "initial_structure" |
| 24 | +elif data_name == "mp": |
| 25 | + data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz" |
| 26 | + input_col = "structure" |
| 27 | + |
26 | 28 | slurm_array_task_count = 10
|
27 | 29 | job_name = f"voronoi-features-{data_name}"
|
28 | 30 | log_dir = f"{module_dir}/{today}-{job_name}"
|
|
39 | 41 |
|
40 | 42 | # %%
|
41 | 43 | slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
|
42 |
| -run_name = f"{job_name}-{slurm_array_task_id}" |
| 44 | +slurm_job_id = os.environ.get("SLURM_JOB_ID", "debug") |
| 45 | +run_name = f"{job_name}-{slurm_job_id}-{slurm_array_task_id}" |
43 | 46 | out_path = f"{log_dir}/{run_name}.csv.bz2"
|
44 | 47 |
|
45 | 48 | if os.path.isfile(out_path):
|
|
57 | 60 | struct_dicts = df_this_job.initial_structure
|
58 | 61 |
|
59 | 62 | df_this_job[input_col] = [
|
60 |
| - Structure.from_dict(x) for x in tqdm(df_this_job.initial_structure, disable=None) |
| 63 | + Structure.from_dict(x) for x in tqdm(struct_dicts, disable=None) |
61 | 64 | ]
|
62 | 65 |
|
63 | 66 |
|
|
79 | 82 | )
|
80 | 83 |
|
81 | 84 |
|
82 |
| -# %% Create the featurizer: Ward et al. use a variety of different featurizers |
83 |
| -# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104 |
84 |
| -featurizers = [ |
85 |
| - feat_struct.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"), |
86 |
| - feat_struct.StructuralHeterogeneity(), |
87 |
| - feat_struct.ChemicalOrdering(), |
88 |
| - feat_struct.MaximumPackingEfficiency(), |
89 |
| - feat_struct.SiteStatsFingerprint.from_preset( |
90 |
| - "LocalPropertyDifference_ward-prb-2017" |
91 |
| - ), |
92 |
| - feat_struct.StructureComposition(feat_comp.Stoichiometry()), |
93 |
| - feat_struct.StructureComposition(feat_comp.ElementProperty.from_preset("magpie")), |
94 |
| - feat_struct.StructureComposition(feat_comp.ValenceOrbital(props=["frac"])), |
95 |
| - feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)), |
96 |
| -] |
97 |
| -featurizer = MultipleFeaturizer(featurizers) |
98 |
| -# multiprocessing seems to be the cause of OOM errors on large structures even when |
99 |
| -# taking only small slice of the data and launching slurm jobs with --mem 100G |
100 |
| -featurizer.set_n_jobs(1) |
101 |
| - |
102 |
| - |
103 | 85 | # %% prints lots of pymatgen warnings
|
104 | 86 | # > No electronegativity for Ne. Setting to NaN. This has no physical meaning, ...
|
105 | 87 | warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen")
|
|
0 commit comments