|
3 | 3 |
|
4 | 4 | from typing import Any
|
5 | 5 |
|
| 6 | +import numpy as np |
6 | 7 | import pandas as pd
|
7 | 8 | import requests
|
8 | 9 | import wandb
|
9 | 10 | import wandb.apis.public
|
10 | 11 | from pymatviz.utils import save_fig
|
11 |
| -from sklearn.metrics import f1_score, r2_score |
12 | 12 | from tqdm import tqdm
|
13 | 13 |
|
14 | 14 | from matbench_discovery import FIGS, MODELS, WANDB_PATH, today
|
15 | 15 | from matbench_discovery.data import PRED_FILENAMES, load_df_wbm_preds
|
| 16 | +from matbench_discovery.energy import stable_metrics |
16 | 17 | from matbench_discovery.plots import px
|
17 | 18 |
|
18 | 19 | __author__ = "Janosh Riebesell"
|
|
97 | 98 |
|
98 | 99 | n_gpu, n_cpu = metadata.get("gpu_count", 0), metadata.get("cpu_count", 0)
|
99 | 100 | model_stats[model] = {
|
100 |
| - "run_time_h": run_time_total / 3600, |
| 101 | + (time_col := "Run Time (h)"): run_time_total / 3600, |
101 | 102 | "GPU": n_gpu,
|
102 | 103 | "CPU": n_cpu,
|
103 |
| - "slurm_jobs": n_runs, |
| 104 | + "Slurm Jobs": n_runs, |
104 | 105 | }
|
105 | 106 |
|
106 | 107 |
|
|
110 | 111 | )
|
111 | 112 |
|
112 | 113 | df_metrics = pd.DataFrame(model_stats).T
|
| 114 | +df_metrics.index.name = "Model" |
113 | 115 | # on 2022-11-28:
|
114 | 116 | # run_times = {'Voronoi Random Forest': 739608,
|
115 | 117 | # 'Wrenformer': 208399,
|
|
121 | 123 | # %%
|
122 | 124 | df_wbm = load_df_wbm_preds(list(models))
|
123 | 125 | e_form_col = "e_form_per_atom_mp2020_corrected"
|
124 |
| -each_col = "e_above_hull_mp2020_corrected_ppd_mp" |
| 126 | +each_true_col = "e_above_hull_mp2020_corrected_ppd_mp" |
125 | 127 |
|
126 | 128 |
|
127 | 129 | # %%
|
128 | 130 | for model in models:
|
129 |
| - dct = {} |
130 |
| - e_above_hull_pred = df_wbm[model] - df_wbm[e_form_col] |
131 |
| - isna = e_above_hull_pred.isna() | df_wbm[each_col].isna() |
| 131 | + each_pred = df_wbm[each_true_col] + df_wbm[model] - df_wbm[e_form_col] |
132 | 132 |
|
133 |
| - dct["F1"] = f1_score(df_wbm[each_col] < 0, e_above_hull_pred < 0) |
134 |
| - dct["Precision"] = f1_score( |
135 |
| - df_wbm[each_col] < 0, e_above_hull_pred < 0, pos_label=True |
136 |
| - ) |
137 |
| - dct["Recall"] = f1_score( |
138 |
| - df_wbm[each_col] < 0, e_above_hull_pred < 0, pos_label=False |
139 |
| - ) |
140 |
| - |
141 |
| - dct["MAE"] = (e_above_hull_pred - df_wbm[each_col]).abs().mean() |
| 133 | + metrics = stable_metrics(df_wbm[each_true_col], each_pred) |
142 | 134 |
|
143 |
| - dct["RMSE"] = ((e_above_hull_pred - df_wbm[each_col]) ** 2).mean() ** 0.5 |
144 |
| - dct["R2"] = r2_score(df_wbm[each_col][~isna], e_above_hull_pred[~isna]) |
| 135 | + df_metrics.loc[model, list(metrics)] = metrics.values() |
145 | 136 |
|
146 |
| - df_metrics.loc[model, list(dct)] = dct.values() |
147 | 137 |
|
148 |
| - |
149 |
| -df_styled = df_metrics.style.format(precision=3).background_gradient( |
150 |
| - cmap="viridis", |
151 |
| - # gmap=np.log10(df_table) # for log scaled color map |
| 138 | +# %% |
| 139 | +df_styled = ( |
| 140 | + df_metrics.reset_index() |
| 141 | + .drop(columns=["GPU", "CPU", "Slurm Jobs"]) |
| 142 | + .style.format(precision=2) |
| 143 | + .background_gradient( |
| 144 | + cmap="viridis_r", # lower is better so reverse color map |
| 145 | + subset=["MAE", "RMSE", "FNR", "FPR"], |
| 146 | + ) |
| 147 | + .background_gradient( |
| 148 | + cmap="viridis_r", |
| 149 | + subset=[time_col], |
| 150 | + gmap=np.log10(df_metrics[time_col].to_numpy()), # for log scaled color map |
| 151 | + ) |
| 152 | + .background_gradient( |
| 153 | + cmap="viridis", # higher is better |
| 154 | + subset=["DAF", "R2", "Precision", "Recall", "F1", "Accuracy", "TPR", "TNR"], |
| 155 | + ) |
| 156 | + .hide(axis="index") |
152 | 157 | )
|
| 158 | +df_styled |
153 | 159 |
|
154 | 160 |
|
155 | 161 | # %% export model metrics as styled HTML table
|
156 | 162 | styles = {
|
157 | 163 | "": "font-family: sans-serif; border-collapse: collapse;",
|
158 |
| - "td, th": "border: 1px solid #ddd; text-align: left; padding: 8px;", |
| 164 | + "td, th": "border: 1px solid #ddd; text-align: left; padding: 8px; white-space: nowrap;", |
159 | 165 | }
|
160 | 166 | df_styled.set_table_styles([dict(selector=sel, props=styles[sel]) for sel in styles])
|
161 | 167 |
|
162 |
| -html_path = f"{FIGS}/{today}-metrics-table.html" |
163 |
| -# df_styled.to_html(html_path) |
| 168 | +html_path = f"{FIGS}/{today}-metrics-table.svelte" |
| 169 | +df_styled.to_html(html_path) |
164 | 170 |
|
165 | 171 |
|
166 | 172 | # %% write model metrics to json for use by the website
|
|
169 | 175 | f"{x / len(df_wbm):.2%}" for x in df_metrics.missing_preds
|
170 | 176 | ]
|
171 | 177 |
|
172 |
| -df_metrics.attrs["total_run_time"] = df_metrics.run_time.sum() |
| 178 | +df_metrics.attrs["Total Run Time"] = df_metrics[time_col].sum() |
173 | 179 |
|
174 | 180 | df_metrics.round(2).to_json(f"{MODELS}/{today}-model-stats.json", orient="index")
|
175 | 181 |
|
176 | 182 |
|
177 | 183 | # %% plot model run times as pie chart
|
178 | 184 | fig = px.pie(
|
179 |
| - df_metrics, values="run_time", names=df_metrics.index, hole=0.5 |
| 185 | + df_metrics, values=time_col, names=df_metrics.index, hole=0.5 |
180 | 186 | ).update_traces(
|
181 | 187 | textinfo="percent+label",
|
182 | 188 | textfont_size=14,
|
|
189 | 195 | )
|
190 | 196 | fig.add_annotation(
|
191 | 197 | # add title in the middle saying "Total CPU+GPU time used"
|
192 |
| - text=f"Total CPU+GPU<br>time used:<br>{df_metrics.run_time.sum():.1f} h", |
| 198 | + text=f"Total CPU+GPU<br>time used:<br>{df_metrics[time_col].sum():.1f} h", |
193 | 199 | font=dict(size=18),
|
194 | 200 | x=0.5,
|
195 | 201 | y=0.5,
|
196 | 202 | showarrow=False,
|
197 | 203 | )
|
198 | 204 | fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
|
199 | 205 |
|
| 206 | + |
| 207 | +# %% |
200 | 208 | save_fig(fig, f"{FIGS}/{today}-model-run-times-pie.svelte")
|
0 commit comments