|
1 | 1 | # %%
|
2 | 2 | import itertools
|
3 | 3 | import subprocess
|
| 4 | +from datetime import date |
4 | 5 | from glob import glob
|
5 | 6 |
|
6 | 7 | import numpy as np
|
|
13 | 14 |
|
14 | 15 | from matbench_discovery import DATA_DIR, PDF_FIGS, ROOT, SCRIPTS, SITE_FIGS
|
15 | 16 | from matbench_discovery.data import DataFiles, df_wbm
|
16 |
| -from matbench_discovery.enums import MbdKey, Open |
| 17 | +from matbench_discovery.enums import MbdKey, Open, Targets |
17 | 18 | from matbench_discovery.metrics import stable_metrics
|
18 | 19 | from matbench_discovery.models import MODEL_METADATA
|
19 | 20 | from matbench_discovery.preds import df_metrics, df_metrics_10k, df_metrics_uniq_protos
|
|
54 | 55 | model_metadata = MODEL_METADATA.get(model_name, {})
|
55 | 56 | model_key = model_metadata.get("model_key", model_name)
|
56 | 57 |
|
57 |
| - df_metrics_uniq_protos.loc[date_added_col, model] = model_metadata.get( |
58 |
| - "date_added", "" |
| 58 | + date_added = model_metadata.get("date_added", "") |
| 59 | + # long format date for tooltip, e.g. Monday, 28 November 2022 |
| 60 | + title = f"{date.fromisoformat(date_added):%A, %d %B %Y}" |
| 61 | + df_metrics_uniq_protos.loc[date_added_col, model] = ( |
| 62 | + f"<span {title=}>{date_added}</span>" |
| 63 | + ) |
| 64 | + |
| 65 | + # Update targets column with full label in tooltip |
| 66 | + model_targets = model_metadata.get(Key.targets, "") |
| 67 | + targets_label = Targets[model_targets].label |
| 68 | + df_metrics_uniq_protos.loc[Key.targets.label, model] = ( |
| 69 | + f'<span title="{targets_label}">{model_targets}</span>' |
59 | 70 | )
|
60 | 71 |
|
61 | 72 | # Add model version as hover tooltip to model name
|
|
103 | 114 | else:
|
104 | 115 | tooltip_lines += [f"{title}: {si_fmt(n_materials)} materials"]
|
105 | 116 |
|
106 |
| - title = "Number of materials in training set" |
| 117 | + title = f"Number of materials in training set = {n_materials_total:,}" |
107 | 118 | train_size_str = (
|
108 | 119 | f"<span {title=} data-sort-value={n_materials_total}>"
|
109 | 120 | f"{si_fmt(n_materials_total, fmt='.0f')}</span>"
|
110 | 121 | )
|
111 | 122 |
|
112 | 123 | if n_materials_total != n_structs_total:
|
113 | 124 | title = (
|
114 |
| - "Number of materials in training set
In parenthesis=number of " |
115 |
| - "structures, usually from multiple DFT relaxation frames per material" |
| 125 | + f"Number of materials in training set = {n_materials_total:,}
In " |
| 126 | + f"parenthesis is number of structures = {n_structs_total}, usually " |
| 127 | + "from multiple DFT relaxation frames per material" |
116 | 128 | )
|
117 | 129 | train_size_str = (
|
118 | 130 | f"<span {title=} data-sort-value={n_materials_total}>"
|
|
145 | 157 |
|
146 | 158 | model_params = model_metadata.get(Key.model_params, 0)
|
147 | 159 | n_estimators = model_metadata.get(Key.n_estimators, -1)
|
148 |
| - title = "Number of models in ensemble" |
| 160 | + title = f"Number of models in ensemble = {n_estimators:,}" |
149 | 161 | n_estimators_str = (
|
150 | 162 | f" <small {title=}>(N={n_estimators})</small>" if n_estimators > 1 else ""
|
151 | 163 | )
|
152 | 164 |
|
153 |
| - title = "Number of trainable model parameters" |
| 165 | + title = f"Number of trainable model parameters = {model_params:,}" |
154 | 166 | formatted_params = si_fmt(model_params)
|
155 | 167 | df_metrics_uniq_protos.loc[Key.model_params.label.replace("eter", ""), model] = (
|
156 | 168 | f'<span {title=} data-sort-value="{model_params}">{formatted_params}'
|
157 | 169 | f"</span>{n_estimators_str}"
|
158 | 170 | )
|
159 | 171 |
|
160 |
| - for key in ( |
161 |
| - MbdKey.openness, |
162 |
| - Key.train_task, |
163 |
| - Key.test_task, |
164 |
| - Key.targets, |
165 |
| - ): |
| 172 | + for key in (MbdKey.openness, Key.train_task, Key.test_task): |
166 | 173 | default = {MbdKey.openness: Open.OSOD}.get(key, pd.NA)
|
167 | 174 | df_metrics_uniq_protos.loc[key.label, model] = model_metadata.get(key, default)
|
168 | 175 |
|
|
0 commit comments