|
10 | 10 | from sklearn.pipeline import Pipeline
|
11 | 11 |
|
12 | 12 | from matbench_discovery import DEBUG, ROOT, today
|
13 |
| -from matbench_discovery.plot_scripts import df_wbm |
| 13 | +from matbench_discovery.plot_scripts import df_wbm, glob_to_df |
14 | 14 | from matbench_discovery.plots import wandb_log_scatter
|
15 | 15 | from matbench_discovery.slurm import slurm_submit
|
16 | 16 | from models.voronoi import featurizer
|
|
41 | 41 |
|
42 | 42 |
|
43 | 43 | # %%
|
44 |
| -train_path = f"{module_dir}/2022-11-25-features-mp.csv.bz2" |
45 |
| -print(f"{train_path=}") |
46 |
| -df_train = pd.read_csv(train_path).set_index("material_id") |
| 44 | +train_path = f"{module_dir}/2022-11-25-features-mp/voronoi-features-mp-*.csv.bz2" |
| 45 | +df_train = glob_to_df(train_path).set_index("material_id") |
47 | 46 | print(f"{df_train.shape=}")
|
48 | 47 |
|
49 | 48 | mp_energies_path = f"{ROOT}/data/mp/2022-08-13-mp-energies.json.gz"
|
50 | 49 | df_mp = pd.read_json(mp_energies_path).set_index("material_id")
|
51 | 50 | train_target_col = "formation_energy_per_atom"
|
52 |
| -df_train[train_target_col] = df_mp[train_target_col] |
53 |
| - |
54 | 51 |
|
55 | 52 | test_path = f"{module_dir}/2022-11-18-features-wbm-{task_type}.csv.bz2"
|
56 |
| -print(f"{test_path=}") |
57 | 53 | df_test = pd.read_csv(test_path).set_index("material_id")
|
58 | 54 | print(f"{df_test.shape=}")
|
59 | 55 |
|
60 | 56 | test_target_col = "e_form_per_atom_mp2020_corrected"
|
61 |
| -df_test[test_target_col] = df_wbm[test_target_col] |
| 57 | + |
| 58 | + |
| 59 | +for df, df_tar, col in ( |
| 60 | + (df_train, df_mp, train_target_col), |
| 61 | + (df_test, df_wbm, test_target_col), |
| 62 | +): |
| 63 | + df[train_target_col] = df_tar[train_target_col] |
| 64 | + nans = df_tar[col].isna().sum() |
| 65 | + assert nans == 0, f"{nans} NaNs in {col} targets" |
| 66 | + |
62 | 67 | model_name = "Voronoi RandomForestRegressor"
|
63 | 68 |
|
64 | 69 | run_params = dict(
|
|
0 commit comments