|
3 | 3 | import warnings
|
4 | 4 | from datetime import datetime
|
5 | 5 |
|
| 6 | +import matminer.featurizers.composition as feat_comp |
| 7 | +import matminer.featurizers.structure as feat_struct |
6 | 8 | import numpy as np
|
7 | 9 | import pandas as pd
|
8 | 10 | import wandb
|
9 | 11 | from matminer.featurizers.base import MultipleFeaturizer
|
10 |
| -from matminer.featurizers.composition import ( |
11 |
| - ElementProperty, |
12 |
| - IonProperty, |
13 |
| - Stoichiometry, |
14 |
| - ValenceOrbital, |
15 |
| -) |
16 |
| -from matminer.featurizers.structure import ( |
17 |
| - ChemicalOrdering, |
18 |
| - MaximumPackingEfficiency, |
19 |
| - SiteStatsFingerprint, |
20 |
| - StructuralHeterogeneity, |
21 |
| - StructureComposition, |
22 |
| -) |
23 | 12 | from pymatgen.core import Structure
|
24 | 13 | from tqdm import tqdm
|
25 | 14 |
|
26 |
| -from matbench_discovery import ROOT |
27 |
| -from matbench_discovery.slurm import slurm_submit_python |
| 15 | +from matbench_discovery import ROOT, as_dict_handler |
| 16 | +from matbench_discovery.slurm import slurm_submit |
28 | 17 |
|
29 | 18 | today = f"{datetime.now():%Y-%m-%d}"
|
30 | 19 | module_dir = os.path.dirname(__file__)
|
31 | 20 |
|
32 | 21 |
|
33 | 22 | # data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
|
34 | 23 | data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
|
35 |
| -input_col = "structure" |
| 24 | +input_col = "initial_structure" |
36 | 25 | data_name = "wbm" if "wbm" in data_path else "mp"
|
37 |
| -slurm_array_task_count = 100 |
| 26 | +slurm_array_task_count = 20 |
38 | 27 | job_name = f"voronoi-featurize-{data_name}"
|
39 | 28 |
|
40 |
| -slurm_vars = slurm_submit_python( |
| 29 | +slurm_vars = slurm_submit( |
41 | 30 | job_name=job_name,
|
42 | 31 | partition="icelake-himem",
|
43 | 32 | account="LEE-SL3-CPU",
|
44 |
| - time=(slurm_max_job_time := "3:0:0"), |
| 33 | + time=(slurm_max_job_time := "5:0:0"), |
45 | 34 | array=f"1-{slurm_array_task_count}",
|
46 | 35 | log_dir=f"{module_dir}/{job_name}",
|
47 | 36 | )
|
|
51 | 40 | df = pd.read_json(data_path).set_index("material_id")
|
52 | 41 |
|
53 | 42 | slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
|
| 43 | +run_name = f"{job_name}-{slurm_array_task_id}" |
| 44 | + |
54 | 45 | df_this_job: pd.DataFrame = np.array_split(df, slurm_array_task_count)[
|
55 | 46 | slurm_array_task_id - 1
|
56 | 47 | ]
|
|
69 | 60 | data_path=data_path,
|
70 | 61 | slurm_max_job_time=slurm_max_job_time,
|
71 | 62 | df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
|
72 |
| - **slurm_vars, |
| 63 | + input_col=input_col, |
| 64 | + slurm_vars=slurm_vars, |
73 | 65 | )
|
74 | 66 | if wandb.run is None:
|
75 | 67 | wandb.login()
|
76 | 68 |
|
77 | 69 | wandb.init(
|
78 | 70 | project="matbench-discovery",
|
79 |
| - name=f"{job_name}-{slurm_array_task_id}", |
| 71 | + name=run_name, |
80 | 72 | config=run_params,
|
81 | 73 | )
|
82 | 74 |
|
83 | 75 |
|
84 | 76 | # %% Create the featurizer: Ward et al. use a variety of different featurizers
|
85 | 77 | # https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104
|
86 | 78 | featurizers = [
|
87 |
| - SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"), |
88 |
| - StructuralHeterogeneity(), |
89 |
| - ChemicalOrdering(), |
90 |
| - MaximumPackingEfficiency(), |
91 |
| - SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"), |
92 |
| - StructureComposition(Stoichiometry()), |
93 |
| - StructureComposition(ElementProperty.from_preset("magpie")), |
94 |
| - StructureComposition(ValenceOrbital(props=["frac"])), |
95 |
| - StructureComposition(IonProperty(fast=True)), |
| 79 | + feat_struct.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"), |
| 80 | + feat_struct.StructuralHeterogeneity(), |
| 81 | + feat_struct.ChemicalOrdering(), |
| 82 | + feat_struct.MaximumPackingEfficiency(), |
| 83 | + feat_struct.SiteStatsFingerprint.from_preset( |
| 84 | + "LocalPropertyDifference_ward-prb-2017" |
| 85 | + ), |
| 86 | + feat_struct.StructureComposition(feat_comp.Stoichiometry()), |
| 87 | + feat_struct.StructureComposition(feat_comp.ElementProperty.from_preset("magpie")), |
| 88 | + feat_struct.StructureComposition(feat_comp.ValenceOrbital(props=["frac"])), |
| 89 | + feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)), |
96 | 90 | ]
|
97 | 91 | featurizer = MultipleFeaturizer(featurizers)
|
98 | 92 |
|
|
108 | 102 |
|
109 | 103 | # %%
|
110 | 104 | df_features.to_json(
|
111 |
| - f"{module_dir}/{today}-voronoi-tesselation-{data_name}-features.json.gz" |
| 105 | + f"{module_dir}/{today}-{run_name}.json.gz", default_handler=as_dict_handler |
112 | 106 | )
|
0 commit comments